''' ######################################################################### # File : parser.py # Author: mbinary # Mail: zhuheqin1@gmail.com # Blog: https://mbinary.xyz # Github: https://github.com/mbinary # Created Time: 2018-11-04 19:50 # Description: ######################################################################### ''' import sys import argparse from math import e,pi,log from random import randint from functools import reduce from token_scanner import gen_token,Token from operator import eq,ge,gt,ne,le,lt, not_,and_,or_,lshift,rshift, add,sub,mod,mul,pow,abs,neg parser = argparse.ArgumentParser() parser.add_argument('-i','--instruction',help="output instructions",action='store_true') parser.add_argument('-s','--stack',help="output data stack when executing each instruction",action='store_true') parser.add_argument('-t','--token',help="output tokens when parsing",action='store_true') parser.add_argument('-v','--varible',help="output varibles for every static environment",action='store_true') parser.add_argument('-f','--file',help="compile and run codes. \n Without this arg, enter interactive REPL",type=str) args = parser.parse_args() FILE = args.file SHOWINS = args.instruction SHOWSTACK = args.stack SHOWVAR = args.varible SHOWTOKEN = args.token WHILE = Token('NAME','while') THEN = Token('NAME','then') ELSE = Token('NAME','else') DO = Token('NAME','do') END = Token('NAME','end') ASSIGN = Token('ASSIGN',':=') EQ = Token('EQ','=') LEFT = Token('LEFT','(') RIGHT = Token('RIGHT',')') COMMA=Token('COMMA',',') SEMICOLON = Token('SEMICOLON',';') PERIOD = Token('PERIOD','.') COLON = Token('COLON',':') class symbol: '''symbols for const, varible, function name''' def __init__(self,name,varType,value=None,level=None,addr = None): self.name = name self.type = varType self.value = value self.level = level self.addr=addr def __str__(self): if self.type=='FUNC': return "({}, {}, {})".format(self.type,self.name,self.addr) elif self.type=='VAR': return "({}, {}={}, {})".format(self.type,self.name,self.value,self.addr) else: return "({}, {}={})".format(self.type,self.name,self.value) def __repr__(self): return "symbol('{}','{}',{},{},{})".format(self.name,self.type,self.value,self.level,self.addr) class stack: '''emulate a stack that with pre-allocated space''' def __init__(self,lst,size=1000): self.lst = lst.copy() self.top=0 self.lst+=[0]*(size-len(lst)) def push(self,val): self.top+=1 if self.top>=len(self.lst): raise Exception('[Error]: data stack overflow') self.lst[self.top]=val def pop(self): self.top -=1 return self.lst[self.top+1] def __setitem__(self,k,val): self.lst[k]=val def __getitem__(self,k): return self.lst[k] def __str__(self): return str(self.lst) def __repr__(self): return 'stack({})'.format(self.lst) class instruction: def __init__(self,name,levelDiff,addr): self.name=name self.levelDiff=levelDiff self.addr=addr def __str__(self): s = self.addr if type(self.addr)==str: s =repr(self.addr) return '{} {} {}'.format(self.name.ljust(4),self.levelDiff,s) class closure: '''environment for every function, including a dict of symbols and pointing to outer environment''' def __init__(self,items=None,outer=None): self.outer =outer if items is None:self.items=dict() else: self.items = items self.varNum=0 def __getitem__(self,key): cur = self while cur is not None: if key in cur.items: return cur.items[key] cur = cur.outer def __setitem__(self,key,val): if key in self.items:raise Exception('[Error]: {} has been defined'.format(key)) if val.type=='VAR': self.varNum+=1 self.items[key] = val def __contains__(self,key): return key in self.items def __iter__(self): return iter(self.items.values()) def __repr__(self): li = [str(i) for i in self.items.values()] return '\n'.join(li) class parser(object): def __init__(self,tokens=None,syms=None,codes=None): self.tokens = [] if tokens is None else tokens self.codes = [] if codes is None else codes self.pointer = 0 self.level = 0 self.ip=0 self.codes=[] self.initSymbol(syms) def initSymbol(self,syms=None): if syms is None: syms=[symbol('E','CONST',e,0),symbol('PI','CONST',pi,0)] self.closure=closure() self.curClosure = self.closure for i in syms: self.addSymbol(i.name,i.type,i.value) def addSymbol(self,var,varType,value=None): sym = symbol(var,varType,value,self.level,self.curClosure.varNum+3) self.curClosure[var]=sym return sym def getSymbol(self,var): sym = self.curClosure[var] if sym is None: self.errorDefine(var) return sym def genIns(self,f,l,a): self.codes.append(instruction(f,l,a)) self.ip+=1 return self.ip-1 def errorInfo(self): '''when parsing codes and encountering error, print whole line in which this error is and print error information ''' def tkstr(tk): if tk.type=='STR':return repr(tk.value) return str(tk.value) tk = self.tokens[self.pointer] a=b = self.pointer lineno = tk.lineNum n = len(self.tokens) while a>=0 and self.tokens[a].lineNum == lineno: a -=1 while b>round(y),'BITAND':lambda x,y:round(x)&round(y), 'BITOR':lambda x,y:round(x)|round(y),'BITNOT':lambda x:~round(x)} self.binaryOPR = dict() self.binaryOPR.update(self.conditionOPR) del self.binaryOPR['NOT'] self.binaryOPR.update(self.arithmeticOPR) self.binaryOPR.update(self.bitOPR) del self.binaryOPR['BITNOT'] self.unaryOPR = {'NEG':neg,'NOT':not_,'BITNOT':lambda x:~round(x),'FAC':lambda x:reduce(mul,range(1,round(x)+1),1),'ODD':lambda x:round(x)%2==1, 'RND':lambda x:randint(0,x),'INT':round}#abs def program(self): self.enableJit = False self.genIns('INT',0,None) self.genIns('JMP',0,None) ip= self.body() self.backpatching(0,self.curClosure.varNum+3) self.backpatching(1,ip) self.match(PERIOD) self.genIns('RET',0,0) def body(self): while 1: if self.isType('CONST') or self.isType('VAR'): tp = self.match().value.upper() while 1: self.wantType('NAME') name = self.match().value val = None if self.isType('EQ'): self.match(EQ) minus = False if self.isType('SUB'): self.match() minus=True self.wantType('NUM') val = float(self.match().value) if minus: val = -val self.addSymbol(name,tp,val) if self.isType('SEMICOLON'): self.match() break self.match(COMMA) elif self.isType('FUNC'): self.match() self.wantType('NAME') name = self.match().value args = self.arg_list() sym = self.addSymbol(name,'FUNC',self.ip) self.level +=1 sym.closure=closure(outer=self.curClosure) self.curClosure = sym.closure beginIp = self.genIns( 'INT',0,None) narg = len(args) sym.argNum = narg ips=[] for arg in args: self.addSymbol(arg,'VAR') ips.append(self.genIns('MOV',None,None)) self.body() nvar = self.curClosure.varNum self.curClosure = self.curClosure.outer span1 = nvar -narg span2 = 3+nvar for i ,ip in enumerate(ips): self.backpatching(ip,span1+i,span2+i) self.match(SEMICOLON) self.backpatching(beginIp,nvar+3) self.level -=1 self.genIns('RET',0,0) else:break ret = self.ip if SHOWVAR: print('level: {}'.format(self.level)) print(self.curClosure) print() for sym in self.curClosure: if sym.type=='VAR' and sym.value is not None: self.genIns('LIT',0,sym.value) self.genIns('STO',0,sym.addr) if not self.isType('PERIOD'): for ip in self.sentence()['RETURN']: self.backpatching(ip,self.ip) return ret def arg_list(self): self.match(LEFT) li = [] if not self.isType('RIGHT'): self.wantType('NAME') li=[self.match().value] while self.isType('COMMA'): self.match() self.wantType('NAME') li.append(self.match().value) self.match(RIGHT) return li def real_arg_list(self): self.match(LEFT) ct=0 if not self.isType('RIGHT'): self.sentenceValue() ct+=1 while self.isType('COMMA'): self.match() self.sentenceValue() ct+=1 self.match(RIGHT) return ct def sentence_list(self,outerLoop=None): ret = self.sentence(outerLoop) while self.isType('SEMICOLON'): self.match() dic=self.sentence(outerLoop) for i in ['BREAK','CONTINUE','RETURN']: ret[i] = ret[i].union(dic[i]) return ret def formatStr(self,s): n = len(s) i = 0 segs = [] last = 0 while i0 and s[i-1]=='\\': segs.append(s[last:i-1]) last=i elif s[i+1] in 'df': segs.append(s[last:i]) segs.append('%{}'.format(s[i+1])) last = i+2 i +=1 i+=1 if last> ') sys.stdout.flush() lines,sys.stdin = getCode(sys.stdin) s = ''.join(lines) tk =[i for i in gen_token(s)] if tk==[]:continue res = cal.parse(tk) if res is not None: print(res) def testFromFile(f): cal = PL0() with open(f,'r') as fp: try: while 1: lines,fp = getCode(fp) if len(lines)==1: print('>>',lines[0].strip('\n\r')) else: print('>> codes: ') for i,l in enumerate(lines): print(str(i+1).ljust(5),l,end='') print() tk =[i for i in gen_token(''.join(lines))] if tk ==[]:continue res = cal.parse(tk) if res is not None: print(res) except EOFError: pass if __name__=='__main__': if FILE: testFromFile(FILE) else: testFromStdIO()