diff --git a/graph/cloneGraph.cpp b/graph/cloneGraph.cpp new file mode 100644 index 0000000..b68a948 --- /dev/null +++ b/graph/cloneGraph.cpp @@ -0,0 +1,30 @@ +class Solution { +public: + map st; + Node *cloneGraph(Node *node){ + Node* ret = new Node(node->val,vector()); + st[node]=ret; + for(auto x:node->neighbors){ + auto p = st.find(x); + if(p==st.end()){ + ret->neighbors.push_back(cloneGraph(x)); + }else ret->neighbors.push_back(p->second); + } + return ret; + } +}; +/* +// Definition for a Node. +class Node { +public: + int val; + vector neighbors; + + Node() {} + + Node(int _val, vector _neighbors) { + val = _val; + neighbors = _neighbors; + } +}; +*/ diff --git a/parser/PL0-compiler/README.md b/parser/PL0-compiler/README.md new file mode 100644 index 0000000..ac873a3 --- /dev/null +++ b/parser/PL0-compiler/README.md @@ -0,0 +1,397 @@ +# PL0-compiler + +[![Stars](https://img.shields.io/github/stars/mbinary/PL0-compiler.svg?label=Stars&style=social)](https://github.com/mbinary/PL0-compiler/stargazers) +[![Forks](https://img.shields.io/github/forks/mbinary/PL0-compiler.svg?label=Fork&style=social)](https://github.com/mbinary/PL0-compiler/network/members) +[![Build](https://travis-ci.org/mbinary/PL0-compiler.svg?branch=master)](https://travis-ci.org/mbinary/PL0-compiler?branch=master) +[![repo-size](https://img.shields.io/github/repo-size/mbinary/PL0-compiler.svg)](.) + +[![License](https://img.shields.io/badge/LICENSE-MIT-blue.svg)](LICENSE) +[![Platform](https://img.shields.io/badge/platform-linux-lightgrey.svg)](.) +[![codecov](https://codecov.io/gh/mbinary/PL0-compiler/branch/master/graph/badge.svg)](https://codecov.io/gh/mbinary/PL0-compiler) +[![codebeat badge](https://codebeat.co/badges/a7af5445-6761-4d2f-b943-c3cb21dcb438)](https://codebeat.co/projects/github-com-mbinary-pl0-compiler-master) + + +> A compiler for c-like programming language **based on** PL0, which is a dynamic, strong typing language. + +See grammar [here](#grammar), [wikipedia-PL0](https://en.wikipedia.org/wiki/PL/0), and download [this pdf(zh)](src/编译原理和技术实践2017.pdf) for more details. + +# QuickStart +```shell +usage: parser.py [-h] [-i] [-s] [-t] [-v] [-f FILE] + +optional arguments: + -h, --help show this help message and exit + -i, --instruction output instructions + -s, --stack output data stack when executing each instruction + -t, --token output tokens when parsing + -v, --varible output varibles for every static environment + -f FILE, --file FILE compile and run codes. Without this arg, enter + interactive REPL +``` + +Run `python parse.py` and enter a REPL state, you can type and run sentences and expressions interactively + +# Examples +Note that when in REPL, every sentence or expresion or block ends with '.'. But in program codes, only the whole program ends with a dot. +## interactive-expression +Therer are some expressions and sentence in file expr.txt, now test it. +`python parser.py -f test/expr.txt` + +```c +>> codes: +1 // expression +2 var a=3,b=2,c;. + +>> c:=a+1. +>> begin c; c+1!=1 ; c+1=5 end. +result: 4.0; True; True; +>> for(;b>=0;b:=b-1) print('random(100): %d',random(100)) . +random(100): 14 +random(100): 60 +random(100): 58 +>> begin ++1--1; 1<<2+3%2; 2&1 end. +result: 2.0; 8; 0; +>> -1+2*3/%2. +result: 2.0; +>> (1+2. +line 1: ( 1 + 2 . + ^ +[Error]: Expected ")", got "." +>> 4!!. +result: 620448401733239439360000; +>> codes: +1 if 0 then 1 +2 elif 1>2 then 2 +3 elif false then 3 +4 else 4. + +result: 4.0; +``` +## fibonacci +Run `python parser.py -f test/fibonacci.txt` + +```c +>> codes: +1 func fib(n) +2 begin +3 if n=1 || n=2 then return 1; +4 return fib(n-1)+fib(n-2); +5 end ; +6 var n=1; +7 begin +8 while n<15 do +9 begin +10 print('fib[%d]=%d',n,fib(n)); +11 n :=n+1; +12 end; +13 end +14 . + +fib[1]=1 +fib[2]=1 +fib[3]=2 +fib[4]=3 +fib[5]=5 +fib[6]=8 +fib[7]=13 +fib[8]=21 +fib[9]=34 +fib[10]=55 +fib[11]=89 +fib[12]=144 +fib[13]=233 +fib[14]=377 +``` + +Try the following commands to explore more examples. +```shell +python parser.py -f test/factorial.txt +python parser.py -f test/closure.txt +python parser.py -f test/closure.txt -i +python parser.py -f test/closure.txt -t +python parser.py -f test/closure.txt -s +python parser.py -f test/closure.txt -istv +python parser.py # enter interactive repl +``` +# Description +## ident type +* constant +* varible +* function +## operator +### relation opr +* \< +* \> +* \<= +* \>= +* = equal +* != +* odd +### bit opr +* \& bitand +* \| bitor +* \~ bitnot +* \<\< left shift +* \>\> right shift +### arithmetic opr +* \+ add/plus +* \- sub/minus +* \* multiply +* \/ divide +* \/\% integer div +* % mod +* \^ power +* \! factorial +### conditon opr +* ?: eg a\>b ? c:d +## control structure +* if elif else +* for +* while +* break +* continue +* return + +## builtin function +* print(formatStr,arg1,...) +* random(), random(n) + +# Grammar +```scala +program = body "." +body = {varDeclaration ";" | constDeclaration ";" | "func" ident "(" arg_list ")" body ";"} sentence + +varDeclaration = "var" varIdent { "," varIdent} +varIdent = ident ["=" number] | ident { "[" number "]" } +constDeclaration = "const" ident "=" number {"," ident "=" number} + +sentence = [ ident ":=" { ident ":=" } sentenceValue + | "begin" sentence { ";" sentence} "end" + | "if" sentenceValue "then" sentence {"elif" sentence} ["else" sentence] + | "while" sentenceValue "do" sentence + | "do" sentence "while" sentenceValue + | "switch" sentenceValue {"case" sentenceValue {"," sentenceValue} ":" [setenceValue]} (* ["default" ":" sentenceValue] to do *) + | "break" + | "continue" + | ["return"] sentenceValue + | "print" "(" str,real_arg_list ")" ] + +sentenceValue = condition + +arg_list = ident { "," ident} + +real_arg_list = sentenceValue {"," sentenceValue } + + +condition = condition_or [ "?" sentenceValue ":" sentenceValue ] +condition_or = condition_and { "||" condition_or } +condition_and = condition_not { condition_not "&&" condition_and} +condition_not = {"!"} condition_unit +condiiton_unit = ["odd"] expression + | expression ("<" | ">" | "<=" | ">=" | "=" | "!=") expression + +expression = level1 { ("<<"| ">>" | "&" | "|") level1 } +level1 = level2 { ( "+" | "-" ) level2 } +level2 = level3 { "*" | "/" | "/%" | "%" ) level3 } +level3 = level4 {"^" level4} +level4 = item {"!"} (* factorial *) +item = number|"true"|"false" | ident { "(" real_arg_list ")" }| "(" sentenceValue" )" | ("+" | "-" | "~" ) item +``` +## syntax +Writet down syntax, then convert left recursion to right recursion. +Namely we should change the following productions: +expr, level0, level, level3 + +We notice that +```scala +A -> Aa|b +``` +equls to +```scala +A -> bR +R -> nil | aR +``` +so here are the right-recursion productions +```scala +expr -> level1 interval1 +interval1 -> nil | {&|'|'|>>|<<|} interval1 + +level1 -> level2 interval2 +interval2 -> nli | {+|-} interval2 + +level2 -> level3 interval3 +interval3 -> nil | {*|/|//|%} interval3 + +level3 -> level4 | level4 ^ level3 + +level4 -> item interval4 +interval4 -> nil |! interval4 + +item -> NUM|E|PI|ln(expr)|(expr)| + item| - item| ~ item +``` + +When implementing the parser, we can use a loop structure to implement the right recursion because it's tail-recursive. + +For instance, we can simply find that the production for `level4` is +```scala +level4 -> item | item ! | item!! |item !!! | ... +``` +Though we can't write a production with infinite loops, we can write it in code like this: +```python +match_level4(): + result = match(item) + while lookAhead matches item: + match("!") + result = factorial(item) + return result +``` + +# Instruction generation +We designed several instructions that can be generated for the target machine. +To simplify this problem, we will emulate this virtual machine and execute instructions in python. +## register +This machine has three registers: +* `b` is the base register that contains the base pointer to locate a varible in the data stack +* `regs` are a series of registers. Currently the first one is used for returning value of latest function call, and the second one is used to store the `switch` value +* `pc` is the pc register that points to the instruction +## stack +There are two stack in this virtual machine. +One contains the instructions, visited by register `pc`. It won't change when executing instructions, so we can assume it's readonly +The other is data stack. It dynamiclly changes when running the program. + +For each level, the first is the base address of this level. The second place is the static chain to visit the upper level's varibles. The third place contains the return address of the upper level. +And the other places in one level contains local varibles and real time data for calculation. +![](src/data_stack.jpg) + +Each time we call a function, the level increases 1. Also, the level decreases 1 when we return from a function. +## instruction +Every instruction consists of three parts. The first is the name of the instruction. Generally, the second is the level diifference of a identifier(if it has). And the third part is the address. + +name | levelDiff | address | explanation +:-:|:-:|:-:|:-: +INT|0|n|allocate n space for one level +INT|1|n| rewind stk.top backward n steps +INT|2|n| print the top n elements of stack +LIT|-|constant value| push a constant value to the top of the data stack +LOD | levelDiff|addr | load a varible value to the top of the data stack. The var can be found use levelDiff and addr +STO|levelDiff|addr| store the stack top value to a varible, top decreases. +CAL|levelDiff|addr|call a function +JMP |-|addr|jmp to addr, namely set addr to pc +JPC|-|addr| pop stack, if the value is not True, jmp addr +MOV|n1|n2| stk[top-n2] = stk[top-n1] +RET|-|-| return to the upper level, use current level's first three value to change pc, data stack, base register. +POP|-|-| pop the data stack, store the value in `reg` register +PUSH|-|-| push `reg` to stack top +OPR|-|operator type| variout operation on value + +# Design +We can generate instruction when analysing grammar. +Some keypoints is the control structures' instruction traslation. +## if elif else +![](src/elseif_ins_stack.jpg) +## while/break +![](src/while_ins_stack.jpg) +`continue`, `for` can be translated in the same way. +## switch +eg +```c +switch n + case 1,2:print('1 or 2') + case 1+5:print('6') + case func_add(1,6):print('7') +; +``` + +## function arguments pass +When analysing the function's defination, we can store the formal arguments as function's local varibles. +As soon as we call this function, we should calculate the real arguments in the level upper the function, and then pass value to the function's formal varibles one by one. + +I use an instruction `MOV` to achive this goal. `MOV addr1, addr2` will store value stk[top-n2] in stk[top-n1]. +Let's have a look at how to call a function and pass args value. + +Before we call a function, its real args will be calculated in the level upper this function. Note function level is n+1, and we call this function in level n. +In level n, we calculated function's args, all values are stored in the data stack of level n. Now call function and enter it. Data stack reaches level n+1 and grows three spaces for `DL`,`SL`,`RA`. The following space are for function's local varibles. So we can mov level n's real args value to these places according to function's argument num and varible num. + +For example, function has n1 args, n2 local varibles(excluding args), then +```python +for i in [0,1..,n1-1]: + mov , n2+n1+3+i, n2 + i +``` +The moment we returned level n, we should rewind top for n1 spaces, `OPR,n1,'BACK'` can make it. + +![](src/argument_pass.jpg) + +## function return +Also, mark function level as n+1, and outer(upper) is level n. +To implement `return` sentence, we just need to do two things: +* calculate `return` sentence value **in level n+1** +* pass this value to level n +It seems that it's hard to pass level n+1 's value to level n. Once we returned to level n, level n+1 's data in data stack will be cleared. + +I use a extra register `reg` to achive this. Before we return, +* calculate return value +* `OPR ,0,'POP'` will pop the value and store it in reg +* return level n +* `OPR,0,'PUSH'` will push reg value to stack top + +Now the return value has be passed from level n+1 to level n +![](src/return_value.jpg) + +## instruction backpatching +Taking `while` block as an example, Note that we don't know the `JPC` instruction's target addr until we finish analysing the whole block.The Solution is that after we analyse while condition, we generate an instruction with no target address, just take a place. We note down this instruction's address. As soon as we finish analysing the whole `while` block, the instruction pointer, namely `ip`, pointing to the target address of `JPC`. Then we backpatch the `JPC` instruction with the target address along to ip. + +## symbol table +When analysing and translating, we want to get the symbol which including level, address,(value for constant) according to its name. The following shows how to achive it elegantly + +There are three types of symbols: +* constant +* varible +* function name +Every function has an environment that contains this level's symbols, and an outer environment(except main function). Every environment has the three symbols mentioned above. + +Defaultly, we are in the main function in the beginning of this program. + +In an enviroment, when we meet a symbol, we should seek it in current environment. If not found, go for the outer environment recursively until we found it. + +It gurantees that every environment has no same names for different symbols but may have same names in different environment. + +So there won't be conflits when different functions have same local varibles or arguments. + +I create class `closure` to describe this kind of environment and varible `curClosure` to mark down current environment. Every time when calling a function, we enter a more inner environment. We do the following things to make sure that environment changes creately. +```python +saved = curClosure +curClosure = function.closure +call function +curClosure = saved +``` +## builtin function--print +This function is just like function `printf` in clang. +Call it in the following format: +`print(FORMAT[,arg1,arg2...])` +The format string supports two kinds of format currently: +* `%d`: integer +* `%f`: float + +If you want to print raw `%d`, not formatting. You can add a back slash ` ` in front of `%`. (So it's with `%f`...) + +For example: +```python +>> print('a=%d, % \%d',1) +a=1, % %d +``` + +To implement this builtin function, we should firstly parse the formatting str. I parse the format-str and generate segs seperated by %d or %f. +For instance, `'fib[%d]=%d'` generates segs `['fib[','%d',']=','%d']`. +For every seg, if it's string, generate instruction `('LIT',0,c)`, c is one chracter that consist of seg. +If it's `%d` or `%f`, we should first match comma, and then parse the followwing value and generate instructions. When in runtime, after executing there instructions, we will get a value(only take place one data-stack unit). + +After handling all segs, we generate an instruction `('INT',2,n)`, which represents printing the top n units of data stack, and stk.top = stk.top-n. +N can be calculated by suming all lengths of str-seg, and num of format-seg. + + +# To do +- [ ] array +- [ ] different value pass +- [ ] function pass +- [ ] type +- [ ] struct diff --git a/parser/PL0-compiler/parser.py b/parser/PL0-compiler/parser.py new file mode 100644 index 0000000..250356f --- /dev/null +++ b/parser/PL0-compiler/parser.py @@ -0,0 +1,835 @@ +''' +######################################################################### +# File : parser.py +# Author: mbinary +# Mail: zhuheqin1@gmail.com +# Blog: https://mbinary.xyz +# Github: https://github.com/mbinary +# Created Time: 2018-11-04 19:50 +# Description: +######################################################################### +''' +import sys +import argparse +from math import e,pi,log +from random import randint +from functools import reduce +from token_scanner import gen_token,Token +from operator import eq,ge,gt,ne,le,lt, not_,and_,or_,lshift,rshift, add,sub,mod,mul,pow,abs,neg + + +parser = argparse.ArgumentParser() + + +parser.add_argument('-i','--instruction',help="output instructions",action='store_true') +parser.add_argument('-s','--stack',help="output data stack when executing each instruction",action='store_true') +parser.add_argument('-t','--token',help="output tokens when parsing",action='store_true') +parser.add_argument('-v','--varible',help="output varibles for every static environment",action='store_true') +parser.add_argument('-f','--file',help="compile and run codes. \n Without this arg, enter interactive REPL",type=str) + +args = parser.parse_args() + +FILE = args.file +SHOWINS = args.instruction +SHOWSTACK = args.stack +SHOWVAR = args.varible +SHOWTOKEN = args.token + + +WHILE = Token('NAME','while') +THEN = Token('NAME','then') +ELSE = Token('NAME','else') +DO = Token('NAME','do') +END = Token('NAME','end') +ASSIGN = Token('ASSIGN',':=') +EQ = Token('EQ','=') +LEFT = Token('LEFT','(') +RIGHT = Token('RIGHT',')') +COMMA=Token('COMMA',',') +SEMICOLON = Token('SEMICOLON',';') +PERIOD = Token('PERIOD','.') +COLON = Token('COLON',':') + +class symbol: + '''symbols for const, varible, function name''' + def __init__(self,name,varType,value=None,level=None,addr = None): + self.name = name + self.type = varType + self.value = value + self.level = level + self.addr=addr + def __str__(self): + if self.type=='FUNC': + return "({}, {}, {})".format(self.type,self.name,self.addr) + elif self.type=='VAR': + return "({}, {}={}, {})".format(self.type,self.name,self.value,self.addr) + else: + return "({}, {}={})".format(self.type,self.name,self.value) + def __repr__(self): + return "symbol('{}','{}',{},{},{})".format(self.name,self.type,self.value,self.level,self.addr) +class stack: + '''emulate a stack that with pre-allocated space''' + def __init__(self,lst,size=1000): + self.lst = lst.copy() + self.top=0 + self.lst+=[0]*(size-len(lst)) + + def push(self,val): + self.top+=1 + if self.top>=len(self.lst): + raise Exception('[Error]: data stack overflow') + self.lst[self.top]=val + def pop(self): + self.top -=1 + return self.lst[self.top+1] + def __setitem__(self,k,val): + self.lst[k]=val + def __getitem__(self,k): + return self.lst[k] + def __str__(self): + return str(self.lst) + def __repr__(self): + return 'stack({})'.format(self.lst) +class instruction: + def __init__(self,name,levelDiff,addr): + self.name=name + self.levelDiff=levelDiff + self.addr=addr + def __str__(self): + s = self.addr + if type(self.addr)==str: + s =repr(self.addr) + return '{} {} {}'.format(self.name.ljust(4),self.levelDiff,s) +class closure: + '''environment for every function, including a dict of symbols and pointing to outer environment''' + def __init__(self,items=None,outer=None): + self.outer =outer + if items is None:self.items=dict() + else: self.items = items + self.varNum=0 + def __getitem__(self,key): + cur = self + while cur is not None: + if key in cur.items: + return cur.items[key] + cur = cur.outer + def __setitem__(self,key,val): + if key in self.items:raise Exception('[Error]: {} has been defined'.format(key)) + if val.type=='VAR': + self.varNum+=1 + self.items[key] = val + def __contains__(self,key): + return key in self.items + def __iter__(self): + return iter(self.items.values()) + def __repr__(self): + li = [str(i) for i in self.items.values()] + return '\n'.join(li) + +class parser(object): + def __init__(self,tokens=None,syms=None,codes=None): + self.tokens = [] if tokens is None else tokens + self.codes = [] if codes is None else codes + self.pointer = 0 + self.level = 0 + self.ip=0 + self.codes=[] + self.initSymbol(syms) + def initSymbol(self,syms=None): + if syms is None: syms=[symbol('E','CONST',e,0),symbol('PI','CONST',pi,0)] + self.closure=closure() + self.curClosure = self.closure + for i in syms: + self.addSymbol(i.name,i.type,i.value) + def addSymbol(self,var,varType,value=None): + sym = symbol(var,varType,value,self.level,self.curClosure.varNum+3) + self.curClosure[var]=sym + return sym + def getSymbol(self,var): + sym = self.curClosure[var] + if sym is None: + self.errorDefine(var) + return sym + def genIns(self,f,l,a): + self.codes.append(instruction(f,l,a)) + self.ip+=1 + return self.ip-1 + def errorInfo(self): + '''when parsing codes and encountering error, + print whole line in which this error is + and print error information + ''' + def tkstr(tk): + if tk.type=='STR':return repr(tk.value) + return str(tk.value) + tk = self.tokens[self.pointer] + a=b = self.pointer + lineno = tk.lineNum + n = len(self.tokens) + while a>=0 and self.tokens[a].lineNum == lineno: + a -=1 + while b>round(y),'BITAND':lambda x,y:round(x)&round(y), 'BITOR':lambda x,y:round(x)|round(y),'BITNOT':lambda x:~round(x)} + self.binaryOPR = dict() + self.binaryOPR.update(self.conditionOPR) + del self.binaryOPR['NOT'] + self.binaryOPR.update(self.arithmeticOPR) + self.binaryOPR.update(self.bitOPR) + del self.binaryOPR['BITNOT'] + self.unaryOPR = {'NEG':neg,'NOT':not_,'BITNOT':lambda x:~round(x),'FAC':lambda x:reduce(mul,range(1,round(x)+1),1),'ODD':lambda x:round(x)%2==1, 'RND':lambda x:randint(0,x),'INT':round}#abs + + def program(self): + self.enableJit = False + self.genIns('INT',0,None) + self.genIns('JMP',0,None) + ip= self.body() + self.backpatching(0,self.curClosure.varNum+3) + self.backpatching(1,ip) + self.match(PERIOD) + self.genIns('RET',0,0) + def body(self): + while 1: + if self.isType('CONST') or self.isType('VAR'): + tp = self.match().value.upper() + while 1: + self.wantType('NAME') + name = self.match().value + val = None + if self.isType('EQ'): + self.match(EQ) + minus = False + if self.isType('SUB'): + self.match() + minus=True + self.wantType('NUM') + val = float(self.match().value) + if minus: val = -val + self.addSymbol(name,tp,val) + if self.isType('SEMICOLON'): + self.match() + break + self.match(COMMA) + elif self.isType('FUNC'): + self.match() + self.wantType('NAME') + name = self.match().value + args = self.arg_list() + sym = self.addSymbol(name,'FUNC',self.ip) + self.level +=1 + sym.closure=closure(outer=self.curClosure) + self.curClosure = sym.closure + beginIp = self.genIns( 'INT',0,None) + narg = len(args) + sym.argNum = narg + ips=[] + for arg in args: + self.addSymbol(arg,'VAR') + ips.append(self.genIns('MOV',None,None)) + self.body() + nvar = self.curClosure.varNum + self.curClosure = self.curClosure.outer + span1 = nvar -narg + span2 = 3+nvar + for i ,ip in enumerate(ips): + self.backpatching(ip,span1+i,span2+i) + self.match(SEMICOLON) + self.backpatching(beginIp,nvar+3) + self.level -=1 + self.genIns('RET',0,0) + else:break + ret = self.ip + if SHOWVAR: + print('level: {}'.format(self.level)) + print(self.curClosure) + print() + for sym in self.curClosure: + if sym.type=='VAR' and sym.value is not None: + self.genIns('LIT',0,sym.value) + self.genIns('STO',0,sym.addr) + if not self.isType('PERIOD'): + for ip in self.sentence()['RETURN']: + self.backpatching(ip,self.ip) + return ret + def arg_list(self): + self.match(LEFT) + li = [] + if not self.isType('RIGHT'): + self.wantType('NAME') + li=[self.match().value] + while self.isType('COMMA'): + self.match() + self.wantType('NAME') + li.append(self.match().value) + self.match(RIGHT) + return li + def real_arg_list(self): + self.match(LEFT) + ct=0 + if not self.isType('RIGHT'): + self.sentenceValue() + ct+=1 + while self.isType('COMMA'): + self.match() + self.sentenceValue() + ct+=1 + self.match(RIGHT) + return ct + def sentence_list(self,outerLoop=None): + ret = self.sentence(outerLoop) + while self.isType('SEMICOLON'): + self.match() + dic=self.sentence(outerLoop) + for i in ['BREAK','CONTINUE','RETURN']: + ret[i] = ret[i].union(dic[i]) + return ret + def formatStr(self,s): + n = len(s) + i = 0 + segs = [] + last = 0 + while i0 and s[i-1]=='\\': + segs.append(s[last:i-1]) + last=i + elif s[i+1] in 'df': + segs.append(s[last:i]) + segs.append('%{}'.format(s[i+1])) + last = i+2 + i +=1 + i+=1 + if last> ') + sys.stdout.flush() + lines,sys.stdin = getCode(sys.stdin) + s = ''.join(lines) + tk =[i for i in gen_token(s)] + if tk==[]:continue + res = cal.parse(tk) + if res is not None: print(res) +def testFromFile(f): + cal = PL0() + with open(f,'r') as fp: + try: + while 1: + lines,fp = getCode(fp) + if len(lines)==1: print('>>',lines[0].strip('\n\r')) + else: + print('>> codes: ') + for i,l in enumerate(lines): + print(str(i+1).ljust(5),l,end='') + print() + tk =[i for i in gen_token(''.join(lines))] + if tk ==[]:continue + res = cal.parse(tk) + if res is not None: print(res) + except EOFError: + pass +if __name__=='__main__': + if FILE: testFromFile(FILE) + else: testFromStdIO() diff --git a/parser/PL0-compiler/requirements.txt b/parser/PL0-compiler/requirements.txt new file mode 100644 index 0000000..e69de29 diff --git a/parser/PL0-compiler/src/Declaration_parser_display.pptx b/parser/PL0-compiler/src/Declaration_parser_display.pptx new file mode 100644 index 0000000..4fabbe7 Binary files /dev/null and b/parser/PL0-compiler/src/Declaration_parser_display.pptx differ diff --git a/parser/PL0-compiler/src/argument_pass.dot b/parser/PL0-compiler/src/argument_pass.dot new file mode 100644 index 0000000..d0cf1eb --- /dev/null +++ b/parser/PL0-compiler/src/argument_pass.dot @@ -0,0 +1,10 @@ +digraph G{ + nodesep=.05; + rankdir=LR; + node [shape=record,width=.1,height=.1]; + node0[label="{SL1|DL1|RA1}|...|val1|val2|{SL2|DL2|RA2}|arg1|arg2"] + node0:a1->node0:v1; + node0:a2->node0:v2; + +} + \ No newline at end of file diff --git a/parser/PL0-compiler/src/argument_pass.jpg b/parser/PL0-compiler/src/argument_pass.jpg new file mode 100644 index 0000000..90b6cfb Binary files /dev/null and b/parser/PL0-compiler/src/argument_pass.jpg differ diff --git a/parser/PL0-compiler/src/data_stack.jpg b/parser/PL0-compiler/src/data_stack.jpg new file mode 100644 index 0000000..148adaf Binary files /dev/null and b/parser/PL0-compiler/src/data_stack.jpg differ diff --git a/parser/PL0-compiler/src/design.md b/parser/PL0-compiler/src/design.md new file mode 100644 index 0000000..2d262d9 --- /dev/null +++ b/parser/PL0-compiler/src/design.md @@ -0,0 +1,162 @@ +# 1. PL0-compiler +> A compiler for c-like programming language **based on** PL0, which is a dynamic, strong typing language. + +>小组成员: 朱河勤, 张世聪, 徐瑞, 詹慧悠 + + +**分工** +* 朱河勤(整体框架设计与实现包括词法分析,语法分析代码生成等) +* 张世聪(do-while、switch语句的实现) +* 詹慧悠(do-while、switch语句的实现及实验报告撰写) +* 徐瑞(实现传值调用) + +# 2. operations and features +![](features.jpg) + +# 3. Grammar +```scala +program = body "." +body = {varDeclaration ";" | constDeclaration ";" | "func" ident "(" arg_list ")" body ";"} sentence + +varDeclaration = "var" varIdent { "," varIdent} +varIdent = ident ["=" number] | ident { "[" number "]" } +constDeclaration = "const" ident "=" number {"," ident "=" number} + +sentence = [ ident ":=" { ident ":=" } sentenceValue + | "begin" sentence { ";" sentence} "end" + | "if" sentenceValue "then" sentence {"elif" sentence} ["else" sentence] + | "while" sentenceValue "do" sentence + | "break" + | "continue" + | ["return"] sentenceValue + | "print" "(" real_arg_list ")" ] + +sentenceValue = condition + +arg_list = ident { "," ident} + +real_arg_list = sentenceValue {"," sentenceValue } + + +condition = condition_or [ "?" sentenceValue ":" sentenceValue ] +condition_or = condition_and { "||" condition_or } +condition_and = condition_not { condition_not "&&" condition_and} +condition_not = {"!"} condition_unit +condiiton_unit = ["odd"] expression + | expression ("<" | ">" | "<=" | ">=" | "=" | "!=") expression + +expression = level1 { ("<<"| ">>" | "&" | "|") level1 } +level1 = level2 { ( "+" | "-" ) level2 } +level2 = level3 { "*" | "/" | "/%" | "%" ) level3 } +level3 = level4 {"^" level4} +level4 = item {"!"} (* factorial *) +item = number |ident { "(" real_arg_list ")" }| "(" sentenceValue" )" | ("+" | "-" | "~" ) item +``` + +# 4. Instruction generation +We designed several instructions that can be generated for the target machine. +To simplify this problem, we will emulate this virtual machine and execute instructions in python. +## 4.1. register +This machine has three registers: +* `b` is the base register that contains the base pointer to locate a varible in the data stack +* `reg` is the return register that contains the return value of latest function call +* `pc` is the pc register that points to the instruction +## 4.2. stack +There are two stack in this virtual machine. +One contains the instructions, visited by register `pc`. It won't change when executing instructions, so we can assume it's readonly +The other is data stack. It dynamiclly changes when running the program. + +For each level, the first is the base address of this level. The second place is the static chain to visit the upper level's varibles. The third place contains the return address of the upper level. +And the other places in one level contains local varibles and real time data for calculation. +![](src/data_stack.jpg) + +Each time we call a function, the level increases 1. Also, the level decreases 1 when we return from a function. + + +## 4.3. instruction +Every instruction consists of three parts. The first is the name of the instruction. Generally, the second is the level diifference of a identifier(if it has). And the third part is the address. + +name | levelDiff | address | explanation +:-:|:-:|:-:|:-: +INT|-|n|allocate n space for one level +LIT|-|constant value| push a constant value to the top of the data stack +LOD | levelDiff|addr | load a varible value to the top of the data stack. The var can be found use levelDiff and addr +STO|levelDiff|addr| store the stack top value to a varible, top decreases. +CAL|levelDiff|addr|call a function +JMP |-|addr|jmp to addr, namely set addr to pc +JPC|-|addr| pop stack, if the value is not True, jmp addr +MOV|n1|n2| stk[top-n2] = stk[top-n1] +OPR |-| RET| return to the upper level, use current level's first three value to change pc, data stack, base register. +OPR | -|POP| pop the data stack, store the value in `reg` register +OPR|-|PUSH| push `reg` to stack top +OPR|n|BACK| rewind stk.top backward n steps +OPR|-|operator type| variout operation on value + +# 5. Design +We can generate instruction when analysing grammar. +Some keypoints is the control structures' instruction traslation. +## 5.1. if elif else +![](../src/elseif_ins_stack.jpg) +## 5.2. while/break +![](../src/while_ins_stack.jpg) +`continue`, `for` can be translated in the same way. +## 5.3. function arguments pass +When analysing the function's defination, we can store the formal arguments as function's local varibles. +As soon as we call this function, we should calculate the real arguments in the level upper the function, and then pass value to the function's formal varibles one by one. + +I use an instruction `MOV` to achive this goal. `MOV addr1, addr2` will store value stk[top-n2] in stk[top-n1]. +Let's have a look at how to call a function and pass args value. + +Before we call a function, its real args will be calculated in the level upper this function. Note function level is n+1, and we call this function in level n. +In level n, we calculated function's args, all values are stored in the data stack of level n. Now call function and enter it. Data stack reaches level n+1 and grows three spaces for `DL`,`SL`,`RA`. The following space are for function's local varibles. So we can mov level n's real args value to these places according to function's argument num and varible num. + +For example, function has n1 args, n2 local varibles(excluding args), then +```python +for i in [0,1..,n1-1]: + mov , n2+n1+3+i, n2 + i +``` +The moment we returned level n, we should rewind top for n1 spaces, `OPR,n1,'BACK'` can make it. + +![](../src/argument_pass.jpg) + +## 5.4. function return +Also, mark function level as n+1, and outer(upper) is level n. +To implement `return` sentence, we just need to do two things: +* calculate `return` sentence value **in level n+1** +* pass this value to level n +It seems that it's hard to pass level n+1 's value to level n. Once we returned to level n, level n+1 's data in data stack will be cleared. + +I use a extra register `reg` to achive this. Before we return, +* calculate return value +* `OPR ,0,'POP'` will pop the value and store it in reg +* return level n +* `OPR,0,'PUSH'` will push reg value to stack top + +Now the return value has be passed from level n+1 to level n + +## 5.5. instruction fillback +Taking `while` block as an example, Note that we don't know the `JPC` instruction's target addr until we finish analysing the whole block.The Solution is that after we analyse while condition, we generate an instruction with no target address, just take a place. We note down this instruction's address. As soon as we finish analysing the whole `while` block, the instruction pointer, namely `ip`, pointing to the target address of `JPC`. Then we fill back the `JPC` instruction with the target address along to ip. + +## 5.6. symbol table +When analysing and translating, we want to get the symbol which including level, address,(value for constant) according to its name. The following shows how to achive it elegantly + +There are three types of symbols: +* constant +* varible +* function name +Every function has an environment that contains this level's symbols, and an outer environment(except main function). Every environment has the three symbols mentioned above. + +Defaultly, we are in the main function in the beginning of this program. + +In an enviroment, when we meet a symbol, we should seek it in current environment. If not found, go for the outer environment recursively until we found it. + +It gurantees that every environment has no same names for different symbols but may have same names in different environment. + +So there won't be conflits when different functions have same local varibles or arguments. + +I create class `closure` to describe this kind of environment and varible `curClosure` to mark down current environment. Every time when calling a function, we enter a more inner environment. We do the following things to make sure that environment changes creately. +```python +saved = curClosure +curClosure = function.closure +call function +curClosure = saved diff --git a/parser/PL0-compiler/src/design.pdf b/parser/PL0-compiler/src/design.pdf new file mode 100644 index 0000000..2b7d61a Binary files /dev/null and b/parser/PL0-compiler/src/design.pdf differ diff --git a/parser/PL0-compiler/src/display.pptx b/parser/PL0-compiler/src/display.pptx new file mode 100644 index 0000000..92cd81e Binary files /dev/null and b/parser/PL0-compiler/src/display.pptx differ diff --git a/parser/PL0-compiler/src/elseif_ins_stack.dot b/parser/PL0-compiler/src/elseif_ins_stack.dot new file mode 100644 index 0000000..ab5593f --- /dev/null +++ b/parser/PL0-compiler/src/elseif_ins_stack.dot @@ -0,0 +1,19 @@ +digraph G { + compound =true + nodesep=.05; + rankdir=LR; + node [shape=record,width=.1,height=.1]; + subgraph cluster_stack{ + label = "instructions stack" + node0 [label = "cond1 | JPC|expr1 | JMP|cond2 | JPC|expr2 | JMP| expr3| ...",height=2.5]; + node0:f1 -> node0:f4; + node0:f3 -> node0:f9; + node0:f5 -> node0:f8; + node0:f7 -> node0:f9; + } + subgraph cluster_elseif{ + label = "program sentences" + node1 [label = "if cond1 |then expr1|elseif cond2 | then expr2|else expr3| ...",height=2.5]; + } + node1 -> node0 [color ="white" ,ltail=cluster_stack, lhead=cluster_elseif]; +} \ No newline at end of file diff --git a/parser/PL0-compiler/src/elseif_ins_stack.jpg b/parser/PL0-compiler/src/elseif_ins_stack.jpg new file mode 100644 index 0000000..5cc830d Binary files /dev/null and b/parser/PL0-compiler/src/elseif_ins_stack.jpg differ diff --git a/parser/PL0-compiler/src/return_value.dot b/parser/PL0-compiler/src/return_value.dot new file mode 100644 index 0000000..dfe3a49 --- /dev/null +++ b/parser/PL0-compiler/src/return_value.dot @@ -0,0 +1,26 @@ +digraph G{ + nodesep=.05; + rankdir=LR; + node [shape=record,width=.1,height=.1]; + subgraph cluster_1{ + node1[label="{SL|DL|RA}|...|{SL2|DL2|RA2}|...|ret-val"] + "reg: "[shape=ellipse] + } + subgraph cluster_2{ + node2[label="{SL|DL|RA}|...|{SL2|DL2|RA2}|..."] + "reg: ret-val"[shape=ellipse] + } + subgraph cluster_3{ + node3[label="{SL|DL|RA}|..."] + "reg: ret-val "[shape=ellipse] + } + subgraph cluster_4{ + node4[label="{SL|DL|RA}|...|ret-val"] + "reg: ret-val "[shape=ellipse] + } + node1 -> node2 [color="red",label="POP",ltail=cluster_1, lhead=cluster_2]; + node2 -> node3 [color="red",label="RET",ltail=cluster_2, lhead=cluster_3]; + node3 -> node4 [color="red",label="PUSH",ltail=cluster_3, lhead=cluster_4]; + +} + \ No newline at end of file diff --git a/parser/PL0-compiler/src/return_value.jpg b/parser/PL0-compiler/src/return_value.jpg new file mode 100644 index 0000000..3a1acab Binary files /dev/null and b/parser/PL0-compiler/src/return_value.jpg differ diff --git a/parser/PL0-compiler/src/while_ins_stack.dot b/parser/PL0-compiler/src/while_ins_stack.dot new file mode 100644 index 0000000..df204f5 --- /dev/null +++ b/parser/PL0-compiler/src/while_ins_stack.dot @@ -0,0 +1,17 @@ +digraph G { + rankdir=LR; + node [shape=record,width=.1,height=.1]; + subgraph cluster_elseif{ + label = "program sentences" + node1 [label = "while cond |...|break | ...|outer while",height=2.5]; + } + subgraph cluster_stack{ + label = "instructions stack" + node0 [label = "cond | JPC|... | JMP|...| JMP| outer while",height=2.5]; + node0:f1 -> node0:f6; + node0:f3 -> node0:f6; + node0:f5 -> node0:f0; + } + + node1:f2->node0:f3 [color=red] +} \ No newline at end of file diff --git a/parser/PL0-compiler/src/while_ins_stack.jpg b/parser/PL0-compiler/src/while_ins_stack.jpg new file mode 100644 index 0000000..aa4bf28 Binary files /dev/null and b/parser/PL0-compiler/src/while_ins_stack.jpg differ diff --git a/parser/PL0-compiler/src/编译原理—pl0实验报告.doc b/parser/PL0-compiler/src/编译原理—pl0实验报告.doc new file mode 100644 index 0000000..3d1378e Binary files /dev/null and b/parser/PL0-compiler/src/编译原理—pl0实验报告.doc differ diff --git a/parser/PL0-compiler/src/编译原理—pl0实验报告.pdf b/parser/PL0-compiler/src/编译原理—pl0实验报告.pdf new file mode 100644 index 0000000..c113f39 Binary files /dev/null and b/parser/PL0-compiler/src/编译原理—pl0实验报告.pdf differ diff --git a/parser/PL0-compiler/src/编译原理和技术实践2017.pdf b/parser/PL0-compiler/src/编译原理和技术实践2017.pdf new file mode 100644 index 0000000..c1adecc Binary files /dev/null and b/parser/PL0-compiler/src/编译原理和技术实践2017.pdf differ diff --git a/parser/PL0-compiler/test/test_token_scanner.py b/parser/PL0-compiler/test/test_token_scanner.py new file mode 100644 index 0000000..71875c0 --- /dev/null +++ b/parser/PL0-compiler/test/test_token_scanner.py @@ -0,0 +1,11 @@ +import unittest +from token_scanner import gen_token + +class TestTokenScanner(unittest.TestCase): + def Test_gen_token(self): + li = [i for i in gen_token('int a;')] + ans = [Token('NAME','int',1),Token('NAME','a',1),Token('SEMICOLON',';',1)] + self.assertEqual(li,ans) + +if __name__=='__main__': + unittest.main() diff --git a/parser/PL0-compiler/test/txt/bug.txt b/parser/PL0-compiler/test/txt/bug.txt new file mode 100644 index 0000000..ce95888 --- /dev/null +++ b/parser/PL0-compiler/test/txt/bug.txt @@ -0,0 +1,16 @@ +func fib(n) +begin + if n=1 || n=2 return 1; + return fib(n-1) +fib(n-2); +end ; + +var n=1; +begin + while n<15 do + begin + print ('The ',n,'th fib item is:',fib(n)); + n :=n+1; + end; + +end +. diff --git a/parser/PL0-compiler/test/txt/closure.txt b/parser/PL0-compiler/test/txt/closure.txt new file mode 100644 index 0000000..5b6ee48 --- /dev/null +++ b/parser/PL0-compiler/test/txt/closure.txt @@ -0,0 +1,13 @@ +var a =1; +func foo(a) + print("[in function foo ] a=%d",a); +func bar(a) + begin + print("[in function bar ] a=%d",a); + foo(4); + end; +begin + print("[in function main] a=%d",a); + foo(2); + bar(3) +end. diff --git a/parser/PL0-compiler/test/txt/dowhile.txt b/parser/PL0-compiler/test/txt/dowhile.txt new file mode 100644 index 0000000..7c9162f --- /dev/null +++ b/parser/PL0-compiler/test/txt/dowhile.txt @@ -0,0 +1,11 @@ +var n=1; +begin + do + begin + print('%d',n); + n:=n+1; + end + while n<30; + print('zhqnb') +end +. diff --git a/parser/PL0-compiler/test/txt/expr.txt b/parser/PL0-compiler/test/txt/expr.txt new file mode 100644 index 0000000..26af21e --- /dev/null +++ b/parser/PL0-compiler/test/txt/expr.txt @@ -0,0 +1,17 @@ + +// expression +var a=3,b=2,c;. + c:=a+1. +begin c; c+1!=1 ; c+1=5 end. +for(;b>=0;b:=b-1) print('random(100): %d',random(100)) . +begin ++1--1; 1<<2+3%2; 2&1 end. + -1+2*3/%2. + (1+2. +4!!. + +if 0 then 1 +elif 1>2 then 2 +elif false then 3 +else 4. + + diff --git a/parser/PL0-compiler/test/txt/factorial.txt b/parser/PL0-compiler/test/txt/factorial.txt new file mode 100644 index 0000000..1dc0a0a --- /dev/null +++ b/parser/PL0-compiler/test/txt/factorial.txt @@ -0,0 +1,12 @@ +func f(n) +begin + if n=1 then return 1; + return n*f(n-1); +end; + +var a; +begin + a:=f(10); + print('factorial 10 is %d',a); +end +. diff --git a/parser/PL0-compiler/test/txt/fibonacci.txt b/parser/PL0-compiler/test/txt/fibonacci.txt new file mode 100644 index 0000000..e743438 --- /dev/null +++ b/parser/PL0-compiler/test/txt/fibonacci.txt @@ -0,0 +1,16 @@ +func fib(n) +begin + if n=1 || n=2 then return 1; + return fib(n-1)+fib(n-2); +end ; + +var n=1; +begin + while n<15 do + begin + print('fib[%d]=%d',n,fib(n)); + n :=n+1; + end; + +end +. diff --git a/parser/PL0-compiler/test/txt/gcd.txt b/parser/PL0-compiler/test/txt/gcd.txt new file mode 100644 index 0000000..aaa7aae --- /dev/null +++ b/parser/PL0-compiler/test/txt/gcd.txt @@ -0,0 +1,38 @@ + +// a program +const n=3; +var r,x,n16; +func multiply(a,b) +var c,d; +begin + c:=0; + while 1 do + begin + if b<=0 then break; + if odd b then c:= c+a; + a:=2 * a; b:=b/%2; + end; + return c; +end; + +// comment here +func gcd(f,g) +begin + for(;f!=g;) + begin + if f5 ? 2^4:1 ; + x:=multiply(4,n); + r:=gcd(multiply(4,n),multiply(1,n16)); ; ; + print('r=%d,x=%d,n16=%d ',r,x,n16) +end. diff --git a/parser/PL0-compiler/test/txt/switch.txt b/parser/PL0-compiler/test/txt/switch.txt new file mode 100644 index 0000000..9678a82 --- /dev/null +++ b/parser/PL0-compiler/test/txt/switch.txt @@ -0,0 +1,10 @@ +func f(n) + print('squre of %d is %d',n,n*n); +var n=-2; +while n<3 do begin + switch n + case 0:f(n) + case 1,-1:f(n) + case 2,0-2:f(n) ; + n:=n+1; +end. diff --git a/parser/PL0-compiler/token_scanner.py b/parser/PL0-compiler/token_scanner.py new file mode 100644 index 0000000..4dbd684 --- /dev/null +++ b/parser/PL0-compiler/token_scanner.py @@ -0,0 +1,108 @@ +''' +######################################################################### +# File : token_scanner.py +# Author: mbinary +# Mail: zhuheqin1@gmail.com +# Blog: https://mbinary.xyz +# Github: https://github.com/mbinary +# Created Time: 2018-09-17 22:20 +# Description: +######################################################################### +''' + +import re +STR = r'[\'\"](?P.*?)[\'\"]' # not completely correct yet +NAME = r'(?P[a-zA-Z_][a-zA-Z_0-9]*)' +NUM = r'(?P\d*\.\d+|\d+)' # note that don't use \d+|\d*\.\d+ + +ASSIGN = r'(?P\:\=)' + +# ODD = r'(?Podd )' +EQ = r'(?P=)' +NEQ = r'(?P!=)' +GT = r'(?P\>)' +LT = r'(?P\<)' +GE = r'(?P\>\=)' +LE = r'(?P\<\=)' + +BITNOT = r'(?P\~)' +BITOR = r'(?P\|)' +BITAND = r'(?P\&)' +RSHIFT = r'(?P\>\>)' +LSHIFT = r'(?P\<\<)' + +AND = r'(?P\&\&)' +NOT = r'(?P\!)' +OR = r'(?P\|\|)' + +ADD = r'(?P\+)' +SUB=r'(?P\-)' + +MUL = r'(?P\*)' +INTDIV = r'(?P\/\%)' +MOD = r'(?P\%)' +DIV = r'(?P
\/)' + +POW = r'(?P\^)' +FAC=r'(?P\!)' #factorial + +COLON = r'(?P\:)' +COMMA = r'(?P\,)' +SEMICOLON = r'(?P\;)' +PERIOD = r'(?P\.)' +QUESTION = r'(?P\?)' +LEFT=r'(?P\()' +RIGHT=r'(?P\))' +WS = r'(?P\s+)' + + +COMMENT = r'(?P//[^\r\n]*|/\*.*?\*/)' + # note that lt,gt should be after le,ge and rshift, lshift +li = [STR,NUM, AND,OR,BITAND,BITOR,BITNOT,RSHIFT,LSHIFT, + EQ,NEQ,GE,LE,LT,GT,\ + SUB,MOD, ADD, MUL,INTDIV,DIV, POW,FAC,NOT,\ + COMMA,SEMICOLON,PERIOD, QUESTION,WS,LEFT,RIGHT,\ + ASSIGN,COLON,NAME] # COLON behind ASSIGN +master_pat = re.compile('|'.join(li),re.DOTALL) + +class Token: + def __init__(self,tp,value,lineNum=None): + self.type = tp + self.value= value + self.lineNum = lineNum + def __eq__(self,tk): + return self.type==tk.type and self.value==tk.value + def __repr__(self): + s = self.value if self.type!='STR' else '"{}"'.format(repr(self.value)) + return '({},{},{})'.format(self.type,s,self.lineNum) + +def gen_token(text): + li = text .split('\n') + beginComment=False + for i,line in enumerate(li): + s = line.lstrip() + if beginComment: + p = s.find('*/') + if p!=-1: beginComment=False + if p!=-1 and p+2> ') + for i in gen_token(expr): + print(i) diff --git a/parser/calculator/calculator.hs b/parser/calculator/calculator.hs new file mode 100644 index 0000000..357ea6b --- /dev/null +++ b/parser/calculator/calculator.hs @@ -0,0 +1,30 @@ +module Calculator where + +-- calculator, integers, operators: +-*/ +-- "2 / 2 + 3 * 4 - 13" == 0 +-- "4 + 3 * 4 / 3 - 6 / 3 * 3 + 8" == 10 +-- +-- expr -> factor | expr {+|-} factor +-- factor -> num | factor {*|/} num + +evaluate :: String -> Double +evaluate s = expr.factor.getNum.filter (\x->x/=' ') $s + + +getNum "" = (0,"") +getNum s = let n = length.takeWhile (\x->'0' <=x && x<='9') $s + (num,res) = splitAt n s + x = read num::Double + in (x,res) + +factor (x,s) = if s=="" || s!!0 =='+' || s!!0 =='-' then (x,s) + else let op = head s + (y,s2) = getNum $tail s + z = if op=='*' then x*y else x/y + in factor (z,s2) + +expr (x,s) = if s=="" then x + else let op = head s + (y,s2) = factor.getNum.tail $s + z = if op=='+' then x+y else x-y + in expr (z,s2) diff --git a/parser/calculator/genExpr.py b/parser/calculator/genExpr.py new file mode 100644 index 0000000..05365d8 --- /dev/null +++ b/parser/calculator/genExpr.py @@ -0,0 +1,32 @@ +from random import randint + + +def genOp(li): + return li[randint(0, len(li)-1)] + + +def genNum(n=20): + return randint(1, n) + + +def genFactor(n=3): + n = randint(1, n) + ret = [str(genNum())] + for i in range(n): + ret.append(genOp('*/')) + ret.append(str(genNum())) + return ''.join(ret) + + +def genExpr(n=8): + n = randint(3, n) + ret = [genFactor()] + for i in range(n): + ret.append(genOp('+-')) + ret.append(genFactor()) + return ' '.join(ret) + + +if __name__ == '__main__': + s = genExpr() + print('evaluate "{}" == {}'.format(s, eval(s))) diff --git a/parser/declarationParser/README.md b/parser/declarationParser/README.md new file mode 100644 index 0000000..44fe702 --- /dev/null +++ b/parser/declarationParser/README.md @@ -0,0 +1,98 @@ +# C-parser +>A token\_scanner and declaration parser for simplified c using LL(1) +# Rules +* size of int or pointer is 1byte +# Grammar +```scala +translation_unit + : declaration + | translation_unit declaration + ; + +declaration + : declaration_specifiers init_declarator_list ';' + ; + +declaration_specifiers + : type_specifier + ; + +init_declarator_list + : init_declarator + | init_declarator_list ',' init_declarator + ; + +init_declarator + : declarator + ; + +type_specifier + : VOID + | INT + ; + +declarator + : pointer direct_declarator + | direct_declarator + ; + +direct_declarator + : IDENTIFIER + | '(' declarator ')' + | direct_declarator '[' CONSTANT_INT ']' + | direct_declarator '(' parameter_type_list ')' + | direct_declarator '(' ')' + ; + pointer + : '*' + | '*' pointer + ; + +parameter_type_list + : parameter_list + ; + +parameter_list + : parameter_declaration + | parameter_list ',' parameter_declaration + ; + +parameter_declaration + : declaration_specifiers declarator + ; +``` +# Examples +```c +>> int *p,q,j[2]; +p::pointer(int) +q::int +j::array(2,int) + +>> int *p[2][3]; +p::array(2,array(3,pointer(int))) + +>> int (*p[4])[2]; +p::array(4,pointer(array(2,int))) + +>> int (*f(int i,void *j))[2]; +f::function( i::int X j::pointer(void) => pointer(array(2,int))) + +>> int f(void i, void j, int p[2]); +f::function( i::void X j::void X p::array(2,int) => int) + +>> int *f(int i)[2]; +[Error]: Array of Functions is not allowed + +>> int f[2](int k); +[Error]: Array of Function can not be returned from functions + +>> void (*(*paa)[10])(int a); +paa::pointer(array(10,pointer(function( a::int => void)))) + +>> int (*(*(*pg())(int x))[20])(int *y); +pg::function( void => pointer(function( x::int => pointer(array(20,pointer(function( y::pointer(int) => int))))))) + +>> int (*p(int * s,int (*t)(int *m, int n, int (*l())[20]),int k[10]))[10][20]; +p::function( s::pointer(int) X t::pointer(function( m::pointer(int) X n::int X l::function( void => pointer(array(20,int))) => int)) X k::array(10,int) => pointer(array(10,array(20,int)))) + +``` diff --git a/parser/declarationParser/declarationParser.py b/parser/declarationParser/declarationParser.py new file mode 100644 index 0000000..4de86b1 --- /dev/null +++ b/parser/declarationParser/declarationParser.py @@ -0,0 +1,155 @@ +''' +######################################################################### +# File : parser.py +# Author: mbinary +# Mail: zhuheqin1@gmail.com +# Blog: https://mbinary.xyz +# Github: https://github.com/mbinary +# Created Time: 2018-09-17 22:19 +# Description: +######################################################################### +''' +import argparse +from token_scanner import gen_token + +argp = argparse.ArgumentParser() + +argp.add_argument('-c','--code',help="output generated code", action='store_true') +args = argp.parse_args() +CODE = args.code + +class parser(object): + def __init__(self): + self.tokens =None + self.n = 0 + self.i= 0 + def isType(self,s): + return self.i=self.n + def match(self,tp=None): + cur = self.tokens[self.i] + #print(self.i,tp,cur) + if tp is None or cur.type==tp: + self.i+=1 + return cur + raise Exception('[parse error] Expect {}, got {}'.format(tp,cur.type)) + def parse(self,tokens): + self.tokens=tokens + self.i = 0 + self.n = len(self.tokens) + try: + self.statement() + if self.i=0 + tp = 'pointer(' *np + symType + ')'*np + return self.direct_declarator(tp) + def direct_declarator(self,tp): + args ='' + inner = '$' + name = '' + if self.isType('NAME'): + name = self.match().value+'::' + elif self.isType('LEFT'): # ( + self.match() + inner = self.declarator('$') #mark + self.match('RIGHT') + if self.isType('LEFT'): + self.match() + li = ['void'] + if not self.isType('RIGHT'): + li = self.parameter_type_list() + self.match('RIGHT') + if self.isType('L2'): + raise Exception('[Error]: Array of Functions is not allowed') + args = ' X '.join(li) + elif self.isType('L2'): + li = [] + while self.isType('L2'): + self.match() + assert self.isType('NUM') + li.append(int(self.match().value)) + self.match('R2') + if self.isType('LEFT'): + raise Exception('[Error]: Array of Function can not be returned from functions') + for i in reversed(li): + tp = 'array({},{})'.format(i,tp) + if args!='': + tp = 'function( {args} => {tp})'.format(args=args,tp =tp ) + return name+inner.replace('$',tp) + + def pointer(self): + n = 0 + while self.isType('POINTER'): + n+=1 + self.match('POINTER') + return n + def parameter_type_list(self): + return self.parameter_list() + def parameter_list(self): + li = [] + while 1: + argType = self.parameter_declaration() + li.append(argType) + if self.isType('COMMA'): + self.match() + else:break + return li + def parameter_declaration(self): + symType = self.declaration_specifiers() + return self.declarator(symType) + +def testFromStdIO(): + dp = declarationParser() + while 1: + s = input('>> ') + tk = [i for i in gen_token(s)] + dp.parse(tk) +def testFromFile(f= 'test.txt'): + dp = declarationParser() + with open(f,'r') as fp: + for line in fp: + line = line.strip(' \n') + if line.startswith('//') or line=='' :continue + print('>>',line) + tk =[i for i in gen_token(line)] + dp.parse(tk) + print() + +if __name__=='__main__': + testFromFile() + testFromStdIO() diff --git a/parser/declarationParser/display.pptx b/parser/declarationParser/display.pptx new file mode 100644 index 0000000..2780d30 Binary files /dev/null and b/parser/declarationParser/display.pptx differ diff --git a/parser/declarationParser/result.jpg b/parser/declarationParser/result.jpg new file mode 100644 index 0000000..2a2de68 Binary files /dev/null and b/parser/declarationParser/result.jpg differ diff --git a/parser/declarationParser/test.txt b/parser/declarationParser/test.txt new file mode 100644 index 0000000..34bc4f8 --- /dev/null +++ b/parser/declarationParser/test.txt @@ -0,0 +1,11 @@ +int *p,q,j[2]; +int *p[2][3]; +int (*p[4])[2]; +int (*f(int i,void *j))[2]; +int f(void i, void j, int p[2]); +//wrong +int *f(int i)[2]; +int f[2](int k); +void (*(*paa)[10])(int a); +int (*(*(*pg())(int x))[20])(int *y); +int (*p(int * s,int (*t)(int *m, int n, int (*l())[20]),int k[10]))[10][20]; diff --git a/parser/declarationParser/token_scanner.py b/parser/declarationParser/token_scanner.py new file mode 100644 index 0000000..ef6d2c1 --- /dev/null +++ b/parser/declarationParser/token_scanner.py @@ -0,0 +1,47 @@ +''' +######################################################################### +# File : token_scanner.py +# Author: mbinary +# Mail: zhuheqin1@gmail.com +# Blog: https://mbinary.xyz +# Github: https://github.com/mbinary +# Created Time: 2018-11-01 12:58 +# Description: +######################################################################### +''' +import re +NAME = r'(?P[a-zA-Z_][a-zA-Z_0-9]*)' +NUM = r'(?P\d*\.\d+|\d+)' # note that don't use \d+|\d*\.\d+ + +POINTER = r'(?P\*)' +COMMA = r'(?P\,)' +SEMICOLON = r'(?P\;)' + +VOID=r'(?Pvoid)' +INT = r'(?Pint)' +LEFT=r'(?P\()' +RIGHT=r'(?P\))' +L2 = r'(?P\[)' +R2 = r'(?P\])' +WS = r'(?P\s+)' + +COMMENT = r'(?P//[^\r\n]*|/\*.*?\*/)' +master_pat = re.compile('|'.join([LEFT,RIGHT,L2,R2,POINTER,COMMA,SEMICOLON,INT,VOID,NUM, WS,NAME]),re.DOTALL) + +class Token: + def __init__(self,tp,val): + self.type = tp + self.value = val + def __repr__(self): + return '({},"{}")'.format(self.type,self.value) +def gen_token(text): + scanner = master_pat.scanner(text) + for m in iter(scanner.match,None): + if m.lastgroup!='WS': + yield Token(m.lastgroup,m.group()) +if __name__ =='__main__': + while 1: + expr = input('>> ') + for i in gen_token(expr): + print(i) + diff --git a/parser/declarationParser/实验报告.pdf b/parser/declarationParser/实验报告.pdf new file mode 100644 index 0000000..3b2dc47 Binary files /dev/null and b/parser/declarationParser/实验报告.pdf differ diff --git a/parser/declarationParser/第八组-编译原理实验.zip b/parser/declarationParser/第八组-编译原理实验.zip new file mode 100644 index 0000000..a9c32c5 Binary files /dev/null and b/parser/declarationParser/第八组-编译原理实验.zip differ