Add parser, graph codes

master
mbinary 2019-03-15 16:37:02 +08:00
parent 26db38a42e
commit e9a10d09d9
40 changed files with 2121 additions and 0 deletions

30
graph/cloneGraph.cpp Normal file
View File

@ -0,0 +1,30 @@
class Solution {
public:
map<Node*,Node*> st;
Node *cloneGraph(Node *node){
Node* ret = new Node(node->val,vector<Node*>());
st[node]=ret;
for(auto x:node->neighbors){
auto p = st.find(x);
if(p==st.end()){
ret->neighbors.push_back(cloneGraph(x));
}else ret->neighbors.push_back(p->second);
}
return ret;
}
};
/*
// Definition for a Node.
class Node {
public:
int val;
vector<Node*> neighbors;
Node() {}
Node(int _val, vector<Node*> _neighbors) {
val = _val;
neighbors = _neighbors;
}
};
*/

View File

@ -0,0 +1,397 @@
# PL0-compiler
[![Stars](https://img.shields.io/github/stars/mbinary/PL0-compiler.svg?label=Stars&style=social)](https://github.com/mbinary/PL0-compiler/stargazers)
[![Forks](https://img.shields.io/github/forks/mbinary/PL0-compiler.svg?label=Fork&style=social)](https://github.com/mbinary/PL0-compiler/network/members)
[![Build](https://travis-ci.org/mbinary/PL0-compiler.svg?branch=master)](https://travis-ci.org/mbinary/PL0-compiler?branch=master)
[![repo-size](https://img.shields.io/github/repo-size/mbinary/PL0-compiler.svg)](.)
<!-- [![License](https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png)](http://creativecommons.org/licenses/by-nc-sa/4.0/) copy LICENCE -->
[![License](https://img.shields.io/badge/LICENSE-MIT-blue.svg)](LICENSE)
[![Platform](https://img.shields.io/badge/platform-linux-lightgrey.svg)](.)
[![codecov](https://codecov.io/gh/mbinary/PL0-compiler/branch/master/graph/badge.svg)](https://codecov.io/gh/mbinary/PL0-compiler)
[![codebeat badge](https://codebeat.co/badges/a7af5445-6761-4d2f-b943-c3cb21dcb438)](https://codebeat.co/projects/github-com-mbinary-pl0-compiler-master)
> A compiler for c-like programming language **based on** PL0, which is a dynamic, strong typing language.
See grammar [here](#grammar), [wikipedia-PL0](https://en.wikipedia.org/wiki/PL/0), and download [this pdf(zh)](src/编译原理和技术实践2017.pdf) for more details.
# QuickStart
```shell
usage: parser.py [-h] [-i] [-s] [-t] [-v] [-f FILE]
optional arguments:
-h, --help show this help message and exit
-i, --instruction output instructions
-s, --stack output data stack when executing each instruction
-t, --token output tokens when parsing
-v, --varible output varibles for every static environment
-f FILE, --file FILE compile and run codes. Without this arg, enter
interactive REPL
```
Run `python parse.py` and enter a REPL state, you can type and run sentences and expressions interactively
# Examples
Note that when in REPL, every sentence or expresion or block ends with '.'. But in program codes, only the whole program ends with a dot.
## interactive-expression
Therer are some expressions and sentence in file expr.txt, now test it.
`python parser.py -f test/expr.txt`
```c
>> codes:
1 // expression
2 var a=3,b=2,c;.
>> c:=a+1.
>> begin c; c+1!=1 ; c+1=5 end.
result: 4.0; True; True;
>> for(;b>=0;b:=b-1) print('random(100): %d',random(100)) .
random(100): 14
random(100): 60
random(100): 58
>> begin ++1--1; 1<<2+3%2; 2&1 end.
result: 2.0; 8; 0;
>> -1+2*3/%2.
result: 2.0;
>> (1+2.
line 1: ( 1 + 2 .
^
[Error]: Expected ")", got "."
>> 4!!.
result: 620448401733239439360000;
>> codes:
1 if 0 then 1
2 elif 1>2 then 2
3 elif false then 3
4 else 4.
result: 4.0;
```
## fibonacci
Run `python parser.py -f test/fibonacci.txt`
```c
>> codes:
1 func fib(n)
2 begin
3 if n=1 || n=2 then return 1;
4 return fib(n-1)+fib(n-2);
5 end ;
6 var n=1;
7 begin
8 while n<15 do
9 begin
10 print('fib[%d]=%d',n,fib(n));
11 n :=n+1;
12 end;
13 end
14 .
fib[1]=1
fib[2]=1
fib[3]=2
fib[4]=3
fib[5]=5
fib[6]=8
fib[7]=13
fib[8]=21
fib[9]=34
fib[10]=55
fib[11]=89
fib[12]=144
fib[13]=233
fib[14]=377
```
Try the following commands to explore more examples.
```shell
python parser.py -f test/factorial.txt
python parser.py -f test/closure.txt
python parser.py -f test/closure.txt -i
python parser.py -f test/closure.txt -t
python parser.py -f test/closure.txt -s
python parser.py -f test/closure.txt -istv
python parser.py # enter interactive repl
```
# Description
## ident type
* constant
* varible
* function
## operator
### relation opr
* \<
* \>
* \<=
* \>=
* = equal
* !=
* odd
### bit opr
* \& bitand
* \| bitor
* \~ bitnot
* \<\< left shift
* \>\> right shift
### arithmetic opr
* \+ add/plus
* \- sub/minus
* \* multiply
* \/ divide
* \/\% integer div
* % mod
* \^ power
* \! factorial
### conditon opr
* ?: eg a\>b ? c:d
## control structure
* if elif else
* for
* while
* break
* continue
* return
## builtin function
* print(formatStr,arg1,...)
* random(), random(n)
# Grammar
```scala
program = body "."
body = {varDeclaration ";" | constDeclaration ";" | "func" ident "(" arg_list ")" body ";"} sentence
varDeclaration = "var" varIdent { "," varIdent}
varIdent = ident ["=" number] | ident { "[" number "]" }
constDeclaration = "const" ident "=" number {"," ident "=" number}
sentence = [ ident ":=" { ident ":=" } sentenceValue
| "begin" sentence { ";" sentence} "end"
| "if" sentenceValue "then" sentence {"elif" sentence} ["else" sentence]
| "while" sentenceValue "do" sentence
| "do" sentence "while" sentenceValue
| "switch" sentenceValue {"case" sentenceValue {"," sentenceValue} ":" [setenceValue]} (* ["default" ":" sentenceValue] to do *)
| "break"
| "continue"
| ["return"] sentenceValue
| "print" "(" str,real_arg_list ")" ]
sentenceValue = condition
arg_list = ident { "," ident}
real_arg_list = sentenceValue {"," sentenceValue }
condition = condition_or [ "?" sentenceValue ":" sentenceValue ]
condition_or = condition_and { "||" condition_or }
condition_and = condition_not { condition_not "&&" condition_and}
condition_not = {"!"} condition_unit
condiiton_unit = ["odd"] expression
| expression ("<" | ">" | "<=" | ">=" | "=" | "!=") expression
expression = level1 { ("<<"| ">>" | "&" | "|") level1 }
level1 = level2 { ( "+" | "-" ) level2 }
level2 = level3 { "*" | "/" | "/%" | "%" ) level3 }
level3 = level4 {"^" level4}
level4 = item {"!"} (* factorial *)
item = number|"true"|"false" | ident { "(" real_arg_list ")" }| "(" sentenceValue" )" | ("+" | "-" | "~" ) item
```
## syntax
Writet down syntax, then convert left recursion to right recursion.
Namely we should change the following productions:
expr, level0, level, level3
We notice that
```scala
A -> Aa|b
```
equls to
```scala
A -> bR
R -> nil | aR
```
so here are the right-recursion productions
```scala
expr -> level1 interval1
interval1 -> nil | {&|'|'|>>|<<|} interval1
level1 -> level2 interval2
interval2 -> nli | {+|-} interval2
level2 -> level3 interval3
interval3 -> nil | {*|/|//|%} interval3
level3 -> level4 | level4 ^ level3
level4 -> item interval4
interval4 -> nil |! interval4
item -> NUM|E|PI|ln(expr)|(expr)| + item| - item| ~ item
```
When implementing the parser, we can use a loop structure to implement the right recursion because it's tail-recursive.
For instance, we can simply find that the production for `level4` is
```scala
level4 -> item | item ! | item!! |item !!! | ...
```
Though we can't write a production with infinite loops, we can write it in code like this:
```python
match_level4():
result = match(item)
while lookAhead matches item:
match("!")
result = factorial(item)
return result
```
# Instruction generation
We designed several instructions that can be generated for the target machine.
To simplify this problem, we will emulate this virtual machine and execute instructions in python.
## register
This machine has three registers:
* `b` is the base register that contains the base pointer to locate a varible in the data stack
* `regs` are a series of registers. Currently the first one is used for returning value of latest function call, and the second one is used to store the `switch` value
* `pc` is the pc register that points to the instruction
## stack
There are two stack in this virtual machine.
One contains the instructions, visited by register `pc`. It won't change when executing instructions, so we can assume it's readonly
The other is data stack. It dynamiclly changes when running the program.
For each level, the first is the base address of this level. The second place is the static chain to visit the upper level's varibles. The third place contains the return address of the upper level.
And the other places in one level contains local varibles and real time data for calculation.
![](src/data_stack.jpg)
Each time we call a function, the level increases 1. Also, the level decreases 1 when we return from a function.
## instruction
Every instruction consists of three parts. The first is the name of the instruction. Generally, the second is the level diifference of a identifier(if it has). And the third part is the address.
name | levelDiff | address | explanation
:-:|:-:|:-:|:-:
INT|0|n|allocate n space for one level
INT|1|n| rewind stk.top backward n steps
INT|2|n| print the top n elements of stack
LIT|-|constant value| push a constant value to the top of the data stack
LOD | levelDiff|addr | load a varible value to the top of the data stack. The var can be found use levelDiff and addr
STO|levelDiff|addr| store the stack top value to a varible, top decreases.
CAL|levelDiff|addr|call a function
JMP |-|addr|jmp to addr, namely set addr to pc
JPC|-|addr| pop stack, if the value is not True, jmp addr
MOV|n1|n2| stk[top-n2] = stk[top-n1]
RET|-|-| return to the upper level, use current level's first three value to change pc, data stack, base register.
POP|-|-| pop the data stack, store the value in `reg` register
PUSH|-|-| push `reg` to stack top
OPR|-|operator type| variout operation on value
# Design
We can generate instruction when analysing grammar.
Some keypoints is the control structures' instruction traslation.
## if elif else
![](src/elseif_ins_stack.jpg)
## while/break
![](src/while_ins_stack.jpg)
`continue`, `for` can be translated in the same way.
## switch
eg
```c
switch n
case 1,2:print('1 or 2')
case 1+5:print('6')
case func_add(1,6):print('7')
;
```
## function arguments pass
When analysing the function's defination, we can store the formal arguments as function's local varibles.
As soon as we call this function, we should calculate the real arguments in the level upper the function, and then pass value to the function's formal varibles one by one.
I use an instruction `MOV` to achive this goal. `MOV addr1, addr2` will store value stk[top-n2] in stk[top-n1].
Let's have a look at how to call a function and pass args value.
Before we call a function, its real args will be calculated in the level upper this function. Note function level is n+1, and we call this function in level n.
In level n, we calculated function's args, all values are stored in the data stack of level n. Now call function and enter it. Data stack reaches level n+1 and grows three spaces for `DL`,`SL`,`RA`. The following space are for function's local varibles. So we can mov level n's real args value to these places according to function's argument num and varible num.
For example, function has n1 args, n2 local varibles(excluding args), then
```python
for i in [0,1..,n1-1]:
mov , n2+n1+3+i, n2 + i
```
The moment we returned level n, we should rewind top for n1 spaces, `OPR,n1,'BACK'` can make it.
![](src/argument_pass.jpg)
## function return
Also, mark function level as n+1, and outer(upper) is level n.
To implement `return` sentence, we just need to do two things:
* calculate `return` sentence value **in level n+1**
* pass this value to level n
It seems that it's hard to pass level n+1 's value to level n. Once we returned to level n, level n+1 's data in data stack will be cleared.
I use a extra register `reg` to achive this. Before we return,
* calculate return value
* `OPR ,0,'POP'` will pop the value and store it in reg
* return level n
* `OPR,0,'PUSH'` will push reg value to stack top
Now the return value has be passed from level n+1 to level n
![](src/return_value.jpg)
## instruction backpatching
Taking `while` block as an example, Note that we don't know the `JPC` instruction's target addr until we finish analysing the whole block.The Solution is that after we analyse while condition, we generate an instruction with no target address, just take a place. We note down this instruction's address. As soon as we finish analysing the whole `while` block, the instruction pointer, namely `ip`, pointing to the target address of `JPC`. Then we backpatch the `JPC` instruction with the target address along to ip.
## symbol table
When analysing and translating, we want to get the symbol which including level, address,(value for constant) according to its name. The following shows how to achive it elegantly
There are three types of symbols:
* constant
* varible
* function name
Every function has an environment that contains this level's symbols, and an outer environment(except main function). Every environment has the three symbols mentioned above.
Defaultly, we are in the main function in the beginning of this program.
In an enviroment, when we meet a symbol, we should seek it in current environment. If not found, go for the outer environment recursively until we found it.
It gurantees that every environment has no same names for different symbols but may have same names in different environment.
So there won't be conflits when different functions have same local varibles or arguments.
I create class `closure` to describe this kind of environment and varible `curClosure` to mark down current environment. Every time when calling a function, we enter a more inner environment. We do the following things to make sure that environment changes creately.
```python
saved = curClosure
curClosure = function.closure
call function
curClosure = saved
```
## builtin function--print
This function is just like function `printf` in clang.
Call it in the following format:
`print(FORMAT[,arg1,arg2...])`
The format string supports two kinds of format currently:
* `%d`: integer
* `%f`: float
If you want to print raw `%d`, not formatting. You can add a back slash ` ` in front of `%`. (So it's with `%f`...)
For example:
```python
>> print('a=%d, % \%d',1)
a=1, % %d
```
To implement this builtin function, we should firstly parse the formatting str. I parse the format-str and generate segs seperated by %d or %f.
For instance, `'fib[%d]=%d'` generates segs `['fib[','%d',']=','%d']`.
For every seg, if it's string, generate instruction `('LIT',0,c)`, c is one chracter that consist of seg.
If it's `%d` or `%f`, we should first match comma, and then parse the followwing value and generate instructions. When in runtime, after executing there instructions, we will get a value(only take place one data-stack unit).
After handling all segs, we generate an instruction `('INT',2,n)`, which represents printing the top n units of data stack, and stk.top = stk.top-n.
N can be calculated by suming all lengths of str-seg, and num of format-seg.
# To do
- [ ] array
- [ ] different value pass
- [ ] function pass
- [ ] type
- [ ] struct

View File

@ -0,0 +1,835 @@
'''
#########################################################################
# File : parser.py
# Author: mbinary
# Mail: zhuheqin1@gmail.com
# Blog: https://mbinary.xyz
# Github: https://github.com/mbinary
# Created Time: 2018-11-04 19:50
# Description:
#########################################################################
'''
import sys
import argparse
from math import e,pi,log
from random import randint
from functools import reduce
from token_scanner import gen_token,Token
from operator import eq,ge,gt,ne,le,lt, not_,and_,or_,lshift,rshift, add,sub,mod,mul,pow,abs,neg
parser = argparse.ArgumentParser()
parser.add_argument('-i','--instruction',help="output instructions",action='store_true')
parser.add_argument('-s','--stack',help="output data stack when executing each instruction",action='store_true')
parser.add_argument('-t','--token',help="output tokens when parsing",action='store_true')
parser.add_argument('-v','--varible',help="output varibles for every static environment",action='store_true')
parser.add_argument('-f','--file',help="compile and run codes. \n Without this arg, enter interactive REPL",type=str)
args = parser.parse_args()
FILE = args.file
SHOWINS = args.instruction
SHOWSTACK = args.stack
SHOWVAR = args.varible
SHOWTOKEN = args.token
WHILE = Token('NAME','while')
THEN = Token('NAME','then')
ELSE = Token('NAME','else')
DO = Token('NAME','do')
END = Token('NAME','end')
ASSIGN = Token('ASSIGN',':=')
EQ = Token('EQ','=')
LEFT = Token('LEFT','(')
RIGHT = Token('RIGHT',')')
COMMA=Token('COMMA',',')
SEMICOLON = Token('SEMICOLON',';')
PERIOD = Token('PERIOD','.')
COLON = Token('COLON',':')
class symbol:
'''symbols for const, varible, function name'''
def __init__(self,name,varType,value=None,level=None,addr = None):
self.name = name
self.type = varType
self.value = value
self.level = level
self.addr=addr
def __str__(self):
if self.type=='FUNC':
return "({}, {}, {})".format(self.type,self.name,self.addr)
elif self.type=='VAR':
return "({}, {}={}, {})".format(self.type,self.name,self.value,self.addr)
else:
return "({}, {}={})".format(self.type,self.name,self.value)
def __repr__(self):
return "symbol('{}','{}',{},{},{})".format(self.name,self.type,self.value,self.level,self.addr)
class stack:
'''emulate a stack that with pre-allocated space'''
def __init__(self,lst,size=1000):
self.lst = lst.copy()
self.top=0
self.lst+=[0]*(size-len(lst))
def push(self,val):
self.top+=1
if self.top>=len(self.lst):
raise Exception('[Error]: data stack overflow')
self.lst[self.top]=val
def pop(self):
self.top -=1
return self.lst[self.top+1]
def __setitem__(self,k,val):
self.lst[k]=val
def __getitem__(self,k):
return self.lst[k]
def __str__(self):
return str(self.lst)
def __repr__(self):
return 'stack({})'.format(self.lst)
class instruction:
def __init__(self,name,levelDiff,addr):
self.name=name
self.levelDiff=levelDiff
self.addr=addr
def __str__(self):
s = self.addr
if type(self.addr)==str:
s =repr(self.addr)
return '{} {} {}'.format(self.name.ljust(4),self.levelDiff,s)
class closure:
'''environment for every function, including a dict of symbols and pointing to outer environment'''
def __init__(self,items=None,outer=None):
self.outer =outer
if items is None:self.items=dict()
else: self.items = items
self.varNum=0
def __getitem__(self,key):
cur = self
while cur is not None:
if key in cur.items:
return cur.items[key]
cur = cur.outer
def __setitem__(self,key,val):
if key in self.items:raise Exception('[Error]: {} has been defined'.format(key))
if val.type=='VAR':
self.varNum+=1
self.items[key] = val
def __contains__(self,key):
return key in self.items
def __iter__(self):
return iter(self.items.values())
def __repr__(self):
li = [str(i) for i in self.items.values()]
return '\n'.join(li)
class parser(object):
def __init__(self,tokens=None,syms=None,codes=None):
self.tokens = [] if tokens is None else tokens
self.codes = [] if codes is None else codes
self.pointer = 0
self.level = 0
self.ip=0
self.codes=[]
self.initSymbol(syms)
def initSymbol(self,syms=None):
if syms is None: syms=[symbol('E','CONST',e,0),symbol('PI','CONST',pi,0)]
self.closure=closure()
self.curClosure = self.closure
for i in syms:
self.addSymbol(i.name,i.type,i.value)
def addSymbol(self,var,varType,value=None):
sym = symbol(var,varType,value,self.level,self.curClosure.varNum+3)
self.curClosure[var]=sym
return sym
def getSymbol(self,var):
sym = self.curClosure[var]
if sym is None:
self.errorDefine(var)
return sym
def genIns(self,f,l,a):
self.codes.append(instruction(f,l,a))
self.ip+=1
return self.ip-1
def errorInfo(self):
'''when parsing codes and encountering error,
print whole line in which this error is
and print error information
'''
def tkstr(tk):
if tk.type=='STR':return repr(tk.value)
return str(tk.value)
tk = self.tokens[self.pointer]
a=b = self.pointer
lineno = tk.lineNum
n = len(self.tokens)
while a>=0 and self.tokens[a].lineNum == lineno:
a -=1
while b<n and self.tokens[b].lineNum == lineno:
b +=1
s1 = ' '.join([tkstr(t) for t in self.tokens[a+1:self.pointer]])
s2 = ' '.join([tkstr(t) for t in self.tokens[self.pointer:b]])
print('line {}: {} {}'.format(lineno,s1,s2))
print(' '*(len(s1)+8+len(str(lineno)))+'^'*len(tk.value))
return tk
def errorIns(self,ins,pc):
print('[Error]: Unknown instruction {}: {} '.format(pc,ins))
def errorDefine(self,var):
raise Exception('[Error]: "{}" is not defined'.format(var))
def errorArg(self,n1,n2):
raise Exception('[Error]: Expected {} args, but {} given'.format(n1,n2))
def errorExpect(self,s):
raise Exception('[Error]: Expected {}, got "{}"'.format(s,self.tokens[self.pointer].value))
def errorLoop(self,s):
raise Exception('[Error]: "{}" outside loop'.format(s))
def match(self,sym=None):
if SHOWTOKEN:
print(self.tokens[self.pointer])
if sym is None \
or (sym.type=='NUM' and self.isType('NUM')) \
or sym==self.tokens[self.pointer]:
self.pointer+=1
return self.tokens[self.pointer-1]
self.errorExpect('"'+sym.value+'"')
def parse(self,tokens=None):
'''parse codes from tokens, then generate instructions and execute them'''
self.ip=0
self.codes=[]
self.pointer=0
if tokens is not None: self.tokens = tokens
if self.tokens is None:return
try:
self.program()
if SHOWINS:
print(' ins i a')
for i,ins in enumerate(self.codes):print(str(i).ljust(4),ins)
if self.pointer != len(self.tokens):
raise Exception ('[Error]: invalid syntax')
#try:pass
except Exception as e:
self.errorInfo()
print(e)
return
result =self.interpret()
for sym in self.closure:
if sym.type=='VAR':
sym.value = result[sym.addr-3]
res = result[self.closure.varNum:]
if res!=[]: print('result: ',end='')
for i in res:
print(i,end='; ')
if res!=[]: print()
def isType(self,s):
'''judge the lookahead symbol'''
if self.pointer == len(self.tokens):sym = Token('EOF','$')
else: sym = self.tokens[self.pointer]
if s in self.reserved: return sym.value==s.lower()
if s =='NAME' and sym.value.upper() in self.reserved: return False
return sym.type ==s
def isAnyType(self,lst):
return any([self.isType(i) for i in lst])
def wantType(self,s):
if not self.isType(s): self.errorExpect(s)
def backpatching(self,ip,addr,levelDiff=None):
self.codes[ip].addr= addr
if levelDiff is not None:self.codes[ip].levelDiff=levelDiff
def program(self):
'''the begining of a grammar, to implement'''
pass
def interpret(self):
'''the code executing emulator'''
pass
class PL0(parser):
def __init__(self,tokens=None,syms=None,codes=None,level=0):
'''init pc, closure, reserved keywords, operators'''
super().__init__()
self.reserved={'FUNC','PRINT','RETURN','BEGIN','END','IF','THEN','FOR','ELIF','ELSE','WHILE','DO','BREAK','CONTINUE','VAR','CONST','ODD','RANDOM','SWITCH','CASE','DEFAULT'}
self.bodyFirst= self.reserved.copy()
self.bodyFirst.remove('ODD')
self.relationOPR= {'EQ':eq,'NEQ':ne,'GT':gt,'LT':lt,'GE':ge,'LE':le} # odd
self.conditionOPR = {'AND':and_,'OR':or_, 'NOT':not_}
self.conditionOPR.update(self.relationOPR)
self.arithmeticOPR = {'ADD':add,'SUB':sub,'MOD':mod,'MUL':mul,'POW':pow,'DIV':lambda x,y:x/y,'INTDIV':lambda x,y:round(x)//round(y) }
self.bitOPR = {'LSHIFT':lambda x,y:round(x)<<round(y),'RSHIFT':lambda x,y:round(x)>>round(y),'BITAND':lambda x,y:round(x)&round(y), 'BITOR':lambda x,y:round(x)|round(y),'BITNOT':lambda x:~round(x)}
self.binaryOPR = dict()
self.binaryOPR.update(self.conditionOPR)
del self.binaryOPR['NOT']
self.binaryOPR.update(self.arithmeticOPR)
self.binaryOPR.update(self.bitOPR)
del self.binaryOPR['BITNOT']
self.unaryOPR = {'NEG':neg,'NOT':not_,'BITNOT':lambda x:~round(x),'FAC':lambda x:reduce(mul,range(1,round(x)+1),1),'ODD':lambda x:round(x)%2==1, 'RND':lambda x:randint(0,x),'INT':round}#abs
def program(self):
self.enableJit = False
self.genIns('INT',0,None)
self.genIns('JMP',0,None)
ip= self.body()
self.backpatching(0,self.curClosure.varNum+3)
self.backpatching(1,ip)
self.match(PERIOD)
self.genIns('RET',0,0)
def body(self):
while 1:
if self.isType('CONST') or self.isType('VAR'):
tp = self.match().value.upper()
while 1:
self.wantType('NAME')
name = self.match().value
val = None
if self.isType('EQ'):
self.match(EQ)
minus = False
if self.isType('SUB'):
self.match()
minus=True
self.wantType('NUM')
val = float(self.match().value)
if minus: val = -val
self.addSymbol(name,tp,val)
if self.isType('SEMICOLON'):
self.match()
break
self.match(COMMA)
elif self.isType('FUNC'):
self.match()
self.wantType('NAME')
name = self.match().value
args = self.arg_list()
sym = self.addSymbol(name,'FUNC',self.ip)
self.level +=1
sym.closure=closure(outer=self.curClosure)
self.curClosure = sym.closure
beginIp = self.genIns( 'INT',0,None)
narg = len(args)
sym.argNum = narg
ips=[]
for arg in args:
self.addSymbol(arg,'VAR')
ips.append(self.genIns('MOV',None,None))
self.body()
nvar = self.curClosure.varNum
self.curClosure = self.curClosure.outer
span1 = nvar -narg
span2 = 3+nvar
for i ,ip in enumerate(ips):
self.backpatching(ip,span1+i,span2+i)
self.match(SEMICOLON)
self.backpatching(beginIp,nvar+3)
self.level -=1
self.genIns('RET',0,0)
else:break
ret = self.ip
if SHOWVAR:
print('level: {}'.format(self.level))
print(self.curClosure)
print()
for sym in self.curClosure:
if sym.type=='VAR' and sym.value is not None:
self.genIns('LIT',0,sym.value)
self.genIns('STO',0,sym.addr)
if not self.isType('PERIOD'):
for ip in self.sentence()['RETURN']:
self.backpatching(ip,self.ip)
return ret
def arg_list(self):
self.match(LEFT)
li = []
if not self.isType('RIGHT'):
self.wantType('NAME')
li=[self.match().value]
while self.isType('COMMA'):
self.match()
self.wantType('NAME')
li.append(self.match().value)
self.match(RIGHT)
return li
def real_arg_list(self):
self.match(LEFT)
ct=0
if not self.isType('RIGHT'):
self.sentenceValue()
ct+=1
while self.isType('COMMA'):
self.match()
self.sentenceValue()
ct+=1
self.match(RIGHT)
return ct
def sentence_list(self,outerLoop=None):
ret = self.sentence(outerLoop)
while self.isType('SEMICOLON'):
self.match()
dic=self.sentence(outerLoop)
for i in ['BREAK','CONTINUE','RETURN']:
ret[i] = ret[i].union(dic[i])
return ret
def formatStr(self,s):
n = len(s)
i = 0
segs = []
last = 0
while i<n:
if s[i]=='%' and i+1<n:
if i>0 and s[i-1]=='\\':
segs.append(s[last:i-1])
last=i
elif s[i+1] in 'df':
segs.append(s[last:i])
segs.append('%{}'.format(s[i+1]))
last = i+2
i +=1
i+=1
if last<n:
segs.append(s[last:])
return segs
def sentence(self,outerLoop=None):
ret ={'BREAK':set(),'CONTINUE':set(),'RETURN':set()}
if self.isType('BEGIN'):
self.match()
ret = self.sentence_list(outerLoop)
self.match(END)
elif self.isType('PRINT'):
self.match()
self.match(LEFT)
if not self.isType('RIGHT'):
self.wantType('STR')
s = self.match().value
else:s=''
segs= self.formatStr(s)
n = 0
for seg in segs:
if seg in ['%d','%f']:
self.match(COMMA)
self.sentenceValue()
if seg=='%d': self.genIns('OPR',1,'INT')#type convert
n +=1
else:
for i in seg: self.genIns('LIT',0,i)
self.genIns('LIT',0,'\n')
unitNum = sum(len(i) for i in segs) -n +1
self.genIns('INT',2,unitNum)
self.match(RIGHT)
elif self.isType('BREAK'):
if outerLoop is None: self.errorLoop('break')
self.match()
ret['BREAK'].add(self.genIns('JMP',0,None))
elif self.isType('CONTINUE'):
self.match()
if outerLoop is None: self.errorLoop('continue')
ret['CONTINUE'].add(self.genIns('JMP',0,None))
elif self.isType('IF'):
self.match()
self.sentenceValue()
self.match(THEN)
jpcIp = self.genIns('JPC',0,None)
ret = self.sentence(outerLoop)
jmpIps = []
while self.isType('ELIF'):
self.match()
ip = self.genIns('JMP',0,None)
jmpIps.append(ip)
self.backpatching(jpcIp,self.ip)
self.sentenceValue()
jpcIp = self.genIns('JPC',0,None)
self.match(THEN)
dic=self.sentence(outerLoop)
for i in ['BREAK','CONTINUE','RETURN']:
ret[i] = ret[i].union(dic[i])
if self.isType('ELSE'):
self.match()
ip = self.genIns('JMP',0,None)
jmpIps.append(ip)
self.backpatching(jpcIp,self.ip)
dic=self.sentence(outerLoop)
for i in ['BREAK','CONTINUE','RETURN']:
ret[i] = ret[i].union(dic[i])
else:
self.backpatching(jpcIp,self.ip)
for ip in jmpIps:
self.backpatching(ip,self.ip)
elif self.isType('SWITCH'):
self.match()
self.sentenceValue()
self.genIns('POP',0,1)
while self.isType('CASE'):
self.match()
self.genIns('PUSH',0,1)
self.sentenceValue()
self.genIns('OPR',2,'EQ')
if self.isType('COMMA'):
self.match()
self.sentenceValue()
self.genIns('PUSH',0,1)
self.genIns('OPR',2,'EQ')
self.genIns('OPR',2,'OR')
jpcIp = self.genIns('JPC',0,None)
self.match(COLON)
if not self.isType('CASE'):
dic = self.sentence()
self.backpatching(jpcIp,self.ip)
#if self.isType('DEFAULT'):
# self.match()
# self.match(COLON)
# self.sentence()
elif self.isType('DO'):
self.match()
jpcIp =None
beginIp = self.ip
ret = self.sentence(1)
self.match(WHILE)
self.sentenceValue()
jpcIp = self.genIns('JPC',0,None)
self.genIns('JMP',0,beginIp)
self.backpatching(jpcIp,self.ip)
for jmpip in ret['BREAK']:
self.backpatching(jmpip,self.ip)
for jmpip in ret['CONTINUE']:
self.backpatching(jmpip,beginIp)
elif self.isType('WHILE') or self.isType('FOR'):
tp = self.match()
beginIp = jpcIp =None
if tp.value=='while':
beginIp = self.ip
self.sentenceValue()
jpcIp = self.genIns('JPC',0,None)
self.match(DO)
else:
self.match(LEFT)
if not self.isType('SEMICOLON'):
self.assignment()
self.match(SEMICOLON)
beginIp = self.ip
if not self.isType('SEMICOLON'):
self.sentenceValue()
jpcIp = self.genIns('JPC',0,None)
self.match(SEMICOLON)
if not self.isType('RIGHT'):
self.assignment()
self.match(RIGHT)
ret = self.sentence(1)
self.genIns('JMP',0,beginIp)
self.backpatching(jpcIp,self.ip)
for jmpip in ret['BREAK']:
self.backpatching(jmpip,self.ip)
for jmpip in ret['CONTINUE']:
self.backpatching(jmpip,beginIp)
elif self.isType('RETURN'): # retrun sentence
self.match()
self.sentenceValue()
self.genIns('POP',0,0)
ret['RETURN'].add(self.genIns('JMP',0,None))
elif self.isAnyType(['SEMICOLON','END','ELSE']):pass # allow blank sentence: namely ; ;;
elif self.isAssignment() : # this must be the last to be checked in sentences
self.assignment()
else:
self.sentenceValue()
return ret
def funcall(self):
name = self.match().value
sym = self.getSymbol(name)
saved = self.curClosure
self.curClosure = sym.closure
n2= self.real_arg_list()
self.curClosure = saved
if sym.argNum!=n2:
self.errorArg(sym.argNum,n2)
self.genIns('CAL',abs(self.level-sym.level),sym.value)
self.genIns('INT',1,n2)
self.genIns('PUSH',0,0)
def sentenceValue(self):
self.condition()
def isAssignment(self):
return self.isType('NAME') \
and self.pointer+1<len(self.tokens)\
and self.tokens[self.pointer+1]==ASSIGN
def assignment(self):
varLst = []
while self.isAssignment():
varLst .append(self.match().value)
self.match(ASSIGN)
self.sentenceValue()
sym0 = self.getSymbol(varLst[0])
lastLevel=abs(self.level-sym0.level)
lastAddr = sym0.addr
self.genIns('STO',lastLevel,sym0.addr)
for var in varLst[1:]:
sym = self.getSymbol(var)
if sym.type=='CONST':
raise Exception('[Error]: Const "{}" can\'t be reassigned'.format(sym.name))
self.genIns('LOD',lastLevel,lastAddr)
lastLevel = abs(self.level-sym.level)
lastAddr = sym.addr
self.genIns('STO',lastLevel,sym.addr)
def condition(self):
self.condition_and()
while self.isType('OR'):
self.match()
self.condition_and()
self.genIns('OPR',2,'OR')
if self.isType('QUESTION'): # 即条件表达式 condition ? expr1 : expr2
self.match()
ip = self.genIns('JPC',0,None)
self.sentenceValue()
ip2 = self.genIns('JMP',0,None)
self.match(COLON)
self.backpatching(ip,self.ip)
self.sentenceValue()
self.backpatching(ip2,self.ip)
def condition_and(self):
self.condition_not()
while self.isType('AND'):
self.match()
self.condition_not()
self.genIns('OPR',2,'AND')
def condition_not(self):
ct = 0
while self.isType('NOT'):
self.match()
ct+=1
self.condition_unit()
if ct%2==1:
self.genIns('OPR',1,'NOT')
def condition_unit(self):
if self.isType('ODD'):
self.match()
self.expression()
self.genIns('OPR',1,'ODD')
return
self.expression() # 允许 表达式作为逻辑值, 即 非0 为真, 0 为假
if self.isAnyType(self.relationOPR):
op = self.match().type
self.expression()
self.genIns('OPR',2,op)
def expression(self):
self.level1()
while 1: # interval production, optimized tail recursion and merged it
if self.isType('RSHIFT'):
self.match()
self.level1()
self.genIns('OPR',2,'RSHIFT')
elif self.isType('LSHIFT'):
self.match()
self.level1()
self.genIns('OPR',2,'LSHIFT')
elif self.isType('BITAND'):
self.match()
self.level1()
self.genIns('OPR',2,'BITAND')
elif self.isType('BITOR'):
self.match()
self.level1()
self.genIns('OPR',2,'BITOR')
else:
return
def item(self):
if self.isType('NUM'):
val = float(self.match().value)
self.genIns('LIT',0,val)
#elif self.isType('STR'):
# val = self.match().value
# self.genIns('LIT',0.,val)
elif self.isType('LEFT'):
self.match()
self.sentenceValue()
self.match(RIGHT)
elif self.isType('SUB'):
self.match()
self.item()
self.genIns('OPR',1,'NEG')
elif self.isType('ADD'):
self.match()
self.item()
elif self.isType('BITNOT'):
self.match()
self.item()
self.genIns('OPR',1,'BITNOT')
elif self.isType('RANDOM'):
self.match()
self.match(LEFT)
if self.isType('RIGHT'):
self.genIns('LIT',0,1<<16)
else:
self.expression()
self.match(RIGHT)
self.genIns('OPR',1,'RND')
elif self.isType('NAME'):
if self.tokens[self.pointer+1] == LEFT:
self.funcall()
else:
name = self.match().value
if name=='true':
self.genIns('LIT',0,True)
elif name=='false':
self.genIns('LIT',0,False)
else:
sym = self.getSymbol(name)
if sym.type=='CONST':
self.genIns('LIT',0,sym.value)
else:
self.genIns('LOD',abs(self.level-sym.level),sym.addr)
else:
self.errorExpect('a value')
def level1(self):
self.level2()
while 1:
if self.isType('ADD'):
self.match()
self.level2()
self.genIns('OPR',2,'ADD')
elif self.isType('SUB'):
self.match()
self.level2()
self.genIns('OPR',2,'SUB')
else: return
def level2(self):
self.level3()
while 1:
if self.isType('MUL'):
self.match()
self.level3()
self.genIns('OPR',2,'MUL')
elif self.isType('DIV'):
self.match()
self.level3()
self.genIns('OPR',2,'DIV')
elif self.isType('INTDIV'):
self.match()
self.level3()
self.genIns('OPR',2,'INTDIV')
elif self.isType('MOD'):
self.match()
self.level3()
self.genIns('OPR',2,'MOD')
else:return
def level3(self):
self.level4()
if self.isType('POW'):
self.match()
self.level3()
self.genIns('OPR',2,'POW')
return
def level4(self):
self.item()
while self.isType('FAC'):#factorial
self.match()
self.genIns('OPR',1,'FAC')
def interpret(self):
def base(stk,curLevel,levelDiff):
for i in range(levelDiff):
curLevel = stk[curLevel]
return curLevel
stk = stack([0,0,0])
stk.top=2
b = pc=0
regs=[None,None]
while 1:
ins = self.codes[pc]
pc+=1
if ins.name=='INT':
if ins.levelDiff==0: stk.top+=ins.addr-3 # allocate space
elif ins.levelDiff==1: stk.top-=ins.addr # rewind stack top bakc n spaces
elif ins.levelDiff==2: #print
stk.top = stk.top-ins.addr+1
for i in range(ins.addr):
print(stk[stk.top+i],end='')
stk.top-=1
else:self.errorIns(ins,pc-1)
elif ins.name=='LIT':
stk.push(ins.addr)
elif ins.name=='STO':
pos = base(stk,b,ins.levelDiff)+ins.addr
stk[pos]= stk.pop()
elif ins.name=='LOD':
val = stk[base(stk,b,ins.levelDiff)+ins.addr]
stk.push(val)
elif ins.name=='MOV':
stk[stk.top-ins.addr] = stk[stk.top-ins.levelDiff]
elif ins.name=='JMP':
pc = ins.addr
elif ins.name=='JPC':
if not stk.pop():
pc = ins.addr
elif ins.name=='CAL':
stk.push(base(stk,b,ins.addr)) # static link
stk.push(b) # dynamic link
b = stk.top-1
stk.push(pc) # return addr
pc = ins.addr
elif ins.name=='OPR':
if ins.levelDiff==1:
stk[stk.top] = self.unaryOPR[ins.addr](stk[stk.top])
elif ins.levelDiff==2:
arg2 = stk.pop()
arg1 = stk[stk.top]
stk[stk.top] = self.binaryOPR[ins.addr](arg1,arg2)
else:self.errorIns(ins,pc-1)
elif ins.name=='RET':
pc = stk[b+2]
if pc!=0: stk.top=b-1
b = stk[b+1]
elif ins.name=='POP':
regs[ins.addr] = stk.pop()
elif ins.name=='PUSH':
stk.push(regs[ins.addr])
else:
self.errorIns(ins,pc-1)
if SHOWSTACK: print(str(pc).ljust(5),ins,stk[:stk.top+1])
if pc==0:break
return stk[3:stk.top+1]
def getCode(inStream):
lines = []
eof = False
while 1:
line = inStream.readline()
if line=='':
eof = True
break
if line.rstrip(' \n\r\t')=='': continue
lines.append(line)
p = line.find('//')
if p==-1 and line.rstrip('\n\r \t').endswith('.'):break
if eof and len(lines)==0: raise EOFError
return lines,inStream
def testFromStdIO():
cal = PL0()
while 1:
sys.stdout.write('>> ')
sys.stdout.flush()
lines,sys.stdin = getCode(sys.stdin)
s = ''.join(lines)
tk =[i for i in gen_token(s)]
if tk==[]:continue
res = cal.parse(tk)
if res is not None: print(res)
def testFromFile(f):
cal = PL0()
with open(f,'r') as fp:
try:
while 1:
lines,fp = getCode(fp)
if len(lines)==1: print('>>',lines[0].strip('\n\r'))
else:
print('>> codes: ')
for i,l in enumerate(lines):
print(str(i+1).ljust(5),l,end='')
print()
tk =[i for i in gen_token(''.join(lines))]
if tk ==[]:continue
res = cal.parse(tk)
if res is not None: print(res)
except EOFError:
pass
if __name__=='__main__':
if FILE: testFromFile(FILE)
else: testFromStdIO()

View File

View File

@ -0,0 +1,10 @@
digraph G{
nodesep=.05;
rankdir=LR;
node [shape=record,width=.1,height=.1];
node0[label="{SL1|DL1|RA1}|...|<v1>val1|<v2>val2|{SL2|DL2|RA2}|<a1>arg1|<a2>arg2"]
node0:a1->node0:v1;
node0:a2->node0:v2;
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

View File

@ -0,0 +1,162 @@
# 1. PL0-compiler
> A compiler for c-like programming language **based on** PL0, which is a dynamic, strong typing language.
>小组成员: 朱河勤, 张世聪, 徐瑞, 詹慧悠
**分工**
* 朱河勤(整体框架设计与实现包括词法分析,语法分析代码生成等)
* 张世聪do-while、switch语句的实现
* 詹慧悠do-while、switch语句的实现及实验报告撰写
* 徐瑞(实现传值调用)
# 2. operations and features
![](features.jpg)
# 3. Grammar
```scala
program = body "."
body = {varDeclaration ";" | constDeclaration ";" | "func" ident "(" arg_list ")" body ";"} sentence
varDeclaration = "var" varIdent { "," varIdent}
varIdent = ident ["=" number] | ident { "[" number "]" }
constDeclaration = "const" ident "=" number {"," ident "=" number}
sentence = [ ident ":=" { ident ":=" } sentenceValue
| "begin" sentence { ";" sentence} "end"
| "if" sentenceValue "then" sentence {"elif" sentence} ["else" sentence]
| "while" sentenceValue "do" sentence
| "break"
| "continue"
| ["return"] sentenceValue
| "print" "(" real_arg_list ")" ]
sentenceValue = condition
arg_list = ident { "," ident}
real_arg_list = sentenceValue {"," sentenceValue }
condition = condition_or [ "?" sentenceValue ":" sentenceValue ]
condition_or = condition_and { "||" condition_or }
condition_and = condition_not { condition_not "&&" condition_and}
condition_not = {"!"} condition_unit
condiiton_unit = ["odd"] expression
| expression ("<" | ">" | "<=" | ">=" | "=" | "!=") expression
expression = level1 { ("<<"| ">>" | "&" | "|") level1 }
level1 = level2 { ( "+" | "-" ) level2 }
level2 = level3 { "*" | "/" | "/%" | "%" ) level3 }
level3 = level4 {"^" level4}
level4 = item {"!"} (* factorial *)
item = number |ident { "(" real_arg_list ")" }| "(" sentenceValue" )" | ("+" | "-" | "~" ) item
```
# 4. Instruction generation
We designed several instructions that can be generated for the target machine.
To simplify this problem, we will emulate this virtual machine and execute instructions in python.
## 4.1. register
This machine has three registers:
* `b` is the base register that contains the base pointer to locate a varible in the data stack
* `reg` is the return register that contains the return value of latest function call
* `pc` is the pc register that points to the instruction
## 4.2. stack
There are two stack in this virtual machine.
One contains the instructions, visited by register `pc`. It won't change when executing instructions, so we can assume it's readonly
The other is data stack. It dynamiclly changes when running the program.
For each level, the first is the base address of this level. The second place is the static chain to visit the upper level's varibles. The third place contains the return address of the upper level.
And the other places in one level contains local varibles and real time data for calculation.
![](src/data_stack.jpg)
Each time we call a function, the level increases 1. Also, the level decreases 1 when we return from a function.
## 4.3. instruction
Every instruction consists of three parts. The first is the name of the instruction. Generally, the second is the level diifference of a identifier(if it has). And the third part is the address.
name | levelDiff | address | explanation
:-:|:-:|:-:|:-:
INT|-|n|allocate n space for one level
LIT|-|constant value| push a constant value to the top of the data stack
LOD | levelDiff|addr | load a varible value to the top of the data stack. The var can be found use levelDiff and addr
STO|levelDiff|addr| store the stack top value to a varible, top decreases.
CAL|levelDiff|addr|call a function
JMP |-|addr|jmp to addr, namely set addr to pc
JPC|-|addr| pop stack, if the value is not True, jmp addr
MOV|n1|n2| stk[top-n2] = stk[top-n1]
OPR |-| RET| return to the upper level, use current level's first three value to change pc, data stack, base register.
OPR | -|POP| pop the data stack, store the value in `reg` register
OPR|-|PUSH| push `reg` to stack top
OPR|n|BACK| rewind stk.top backward n steps
OPR|-|operator type| variout operation on value
# 5. Design
We can generate instruction when analysing grammar.
Some keypoints is the control structures' instruction traslation.
## 5.1. if elif else
![](../src/elseif_ins_stack.jpg)
## 5.2. while/break
![](../src/while_ins_stack.jpg)
`continue`, `for` can be translated in the same way.
## 5.3. function arguments pass
When analysing the function's defination, we can store the formal arguments as function's local varibles.
As soon as we call this function, we should calculate the real arguments in the level upper the function, and then pass value to the function's formal varibles one by one.
I use an instruction `MOV` to achive this goal. `MOV addr1, addr2` will store value stk[top-n2] in stk[top-n1].
Let's have a look at how to call a function and pass args value.
Before we call a function, its real args will be calculated in the level upper this function. Note function level is n+1, and we call this function in level n.
In level n, we calculated function's args, all values are stored in the data stack of level n. Now call function and enter it. Data stack reaches level n+1 and grows three spaces for `DL`,`SL`,`RA`. The following space are for function's local varibles. So we can mov level n's real args value to these places according to function's argument num and varible num.
For example, function has n1 args, n2 local varibles(excluding args), then
```python
for i in [0,1..,n1-1]:
mov , n2+n1+3+i, n2 + i
```
The moment we returned level n, we should rewind top for n1 spaces, `OPR,n1,'BACK'` can make it.
![](../src/argument_pass.jpg)
## 5.4. function return
Also, mark function level as n+1, and outer(upper) is level n.
To implement `return` sentence, we just need to do two things:
* calculate `return` sentence value **in level n+1**
* pass this value to level n
It seems that it's hard to pass level n+1 's value to level n. Once we returned to level n, level n+1 's data in data stack will be cleared.
I use a extra register `reg` to achive this. Before we return,
* calculate return value
* `OPR ,0,'POP'` will pop the value and store it in reg
* return level n
* `OPR,0,'PUSH'` will push reg value to stack top
Now the return value has be passed from level n+1 to level n
## 5.5. instruction fillback
Taking `while` block as an example, Note that we don't know the `JPC` instruction's target addr until we finish analysing the whole block.The Solution is that after we analyse while condition, we generate an instruction with no target address, just take a place. We note down this instruction's address. As soon as we finish analysing the whole `while` block, the instruction pointer, namely `ip`, pointing to the target address of `JPC`. Then we fill back the `JPC` instruction with the target address along to ip.
## 5.6. symbol table
When analysing and translating, we want to get the symbol which including level, address,(value for constant) according to its name. The following shows how to achive it elegantly
There are three types of symbols:
* constant
* varible
* function name
Every function has an environment that contains this level's symbols, and an outer environment(except main function). Every environment has the three symbols mentioned above.
Defaultly, we are in the main function in the beginning of this program.
In an enviroment, when we meet a symbol, we should seek it in current environment. If not found, go for the outer environment recursively until we found it.
It gurantees that every environment has no same names for different symbols but may have same names in different environment.
So there won't be conflits when different functions have same local varibles or arguments.
I create class `closure` to describe this kind of environment and varible `curClosure` to mark down current environment. Every time when calling a function, we enter a more inner environment. We do the following things to make sure that environment changes creately.
```python
saved = curClosure
curClosure = function.closure
call function
curClosure = saved

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,19 @@
digraph G {
compound =true
nodesep=.05;
rankdir=LR;
node [shape=record,width=.1,height=.1];
subgraph cluster_stack{
label = "instructions stack"
node0 [label = "<f0>cond1 |<f1> JPC|<f2>expr1 |<f3> JMP|<f4>cond2 |<f5> JPC|<f6>expr2 | <f7>JMP|<f8> expr3|<f9> ...",height=2.5];
node0:f1 -> node0:f4;
node0:f3 -> node0:f9;
node0:f5 -> node0:f8;
node0:f7 -> node0:f9;
}
subgraph cluster_elseif{
label = "program sentences"
node1 [label = "<f0>if cond1 |<f1>then expr1|<f2>elseif cond2 |<f3> then expr2|<f4>else expr3| ...",height=2.5];
}
node1 -> node0 [color ="white" ,ltail=cluster_stack, lhead=cluster_elseif];
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 66 KiB

View File

@ -0,0 +1,26 @@
digraph G{
nodesep=.05;
rankdir=LR;
node [shape=record,width=.1,height=.1];
subgraph cluster_1{
node1[label="{SL|DL|RA}|...|{SL2|DL2|RA2}|...|ret-val"]
"reg: "[shape=ellipse]
}
subgraph cluster_2{
node2[label="{SL|DL|RA}|...|{SL2|DL2|RA2}|..."]
"reg: ret-val"[shape=ellipse]
}
subgraph cluster_3{
node3[label="{SL|DL|RA}|..."]
"reg: ret-val "[shape=ellipse]
}
subgraph cluster_4{
node4[label="{SL|DL|RA}|...|ret-val"]
"reg: ret-val "[shape=ellipse]
}
node1 -> node2 [color="red",label="POP",ltail=cluster_1, lhead=cluster_2];
node2 -> node3 [color="red",label="RET",ltail=cluster_2, lhead=cluster_3];
node3 -> node4 [color="red",label="PUSH",ltail=cluster_3, lhead=cluster_4];
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

View File

@ -0,0 +1,17 @@
digraph G {
rankdir=LR;
node [shape=record,width=.1,height=.1];
subgraph cluster_elseif{
label = "program sentences"
node1 [label = "<f0>while cond |<f1>...|<f2>break |<f3> ...|<f4>outer while",height=2.5];
}
subgraph cluster_stack{
label = "instructions stack"
node0 [label = "<f0>cond |<f1> JPC|<f2>... |<f3> JMP|<f4>...|<f5> JMP|<f6> outer while",height=2.5];
node0:f1 -> node0:f6;
node0:f3 -> node0:f6;
node0:f5 -> node0:f0;
}
node1:f2->node0:f3 [color=red]
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 72 KiB

View File

@ -0,0 +1,11 @@
import unittest
from token_scanner import gen_token
class TestTokenScanner(unittest.TestCase):
def Test_gen_token(self):
li = [i for i in gen_token('int a;')]
ans = [Token('NAME','int',1),Token('NAME','a',1),Token('SEMICOLON',';',1)]
self.assertEqual(li,ans)
if __name__=='__main__':
unittest.main()

View File

@ -0,0 +1,16 @@
func fib(n)
begin
if n=1 || n=2 return 1;
return fib(n-1) +fib(n-2);
end ;
var n=1;
begin
while n<15 do
begin
print ('The ',n,'th fib item is:',fib(n));
n :=n+1;
end;
end
.

View File

@ -0,0 +1,13 @@
var a =1;
func foo(a)
print("[in function foo ] a=%d",a);
func bar(a)
begin
print("[in function bar ] a=%d",a);
foo(4);
end;
begin
print("[in function main] a=%d",a);
foo(2);
bar(3)
end.

View File

@ -0,0 +1,11 @@
var n=1;
begin
do
begin
print('%d',n);
n:=n+1;
end
while n<30;
print('zhqnb')
end
.

View File

@ -0,0 +1,17 @@
// expression
var a=3,b=2,c;.
c:=a+1.
begin c; c+1!=1 ; c+1=5 end.
for(;b>=0;b:=b-1) print('random(100): %d',random(100)) .
begin ++1--1; 1<<2+3%2; 2&1 end.
-1+2*3/%2.
(1+2.
4!!.
if 0 then 1
elif 1>2 then 2
elif false then 3
else 4.

View File

@ -0,0 +1,12 @@
func f(n)
begin
if n=1 then return 1;
return n*f(n-1);
end;
var a;
begin
a:=f(10);
print('factorial 10 is %d',a);
end
.

View File

@ -0,0 +1,16 @@
func fib(n)
begin
if n=1 || n=2 then return 1;
return fib(n-1)+fib(n-2);
end ;
var n=1;
begin
while n<15 do
begin
print('fib[%d]=%d',n,fib(n));
n :=n+1;
end;
end
.

View File

@ -0,0 +1,38 @@
// a program
const n=3;
var r,x,n16;
func multiply(a,b)
var c,d;
begin
c:=0;
while 1 do
begin
if b<=0 then break;
if odd b then c:= c+a;
a:=2 * a; b:=b/%2;
end;
return c;
end;
// comment here
func gcd(f,g)
begin
for(;f!=g;)
begin
if f<g then g:=g-f
elif g<f then f:= f-g
else continue;
end;
return g;
end;
begin
if 0 then 1
elif 0 then 1
else r:=n16:=3! >5 ? 2^4:1 ;
x:=multiply(4,n);
r:=gcd(multiply(4,n),multiply(1,n16)); ; ;
print('r=%d,x=%d,n16=%d ',r,x,n16)
end.

View File

@ -0,0 +1,10 @@
func f(n)
print('squre of %d is %d',n,n*n);
var n=-2;
while n<3 do begin
switch n
case 0:f(n)
case 1,-1:f(n)
case 2,0-2:f(n) ;
n:=n+1;
end.

View File

@ -0,0 +1,108 @@
'''
#########################################################################
# File : token_scanner.py
# Author: mbinary
# Mail: zhuheqin1@gmail.com
# Blog: https://mbinary.xyz
# Github: https://github.com/mbinary
# Created Time: 2018-09-17 22:20
# Description:
#########################################################################
'''
import re
STR = r'[\'\"](?P<STR>.*?)[\'\"]' # not completely correct yet
NAME = r'(?P<NAME>[a-zA-Z_][a-zA-Z_0-9]*)'
NUM = r'(?P<NUM>\d*\.\d+|\d+)' # note that don't use \d+|\d*\.\d+
ASSIGN = r'(?P<ASSIGN>\:\=)'
# ODD = r'(?P<ODD>odd )'
EQ = r'(?P<EQ>=)'
NEQ = r'(?P<NEQ>!=)'
GT = r'(?P<GT>\>)'
LT = r'(?P<LT>\<)'
GE = r'(?P<GE>\>\=)'
LE = r'(?P<LE>\<\=)'
BITNOT = r'(?P<BITNOT>\~)'
BITOR = r'(?P<BITOR>\|)'
BITAND = r'(?P<BITAND>\&)'
RSHIFT = r'(?P<RSHIFT>\>\>)'
LSHIFT = r'(?P<LSHIFT>\<\<)'
AND = r'(?P<AND>\&\&)'
NOT = r'(?P<NOT>\!)'
OR = r'(?P<OR>\|\|)'
ADD = r'(?P<ADD>\+)'
SUB=r'(?P<SUB>\-)'
MUL = r'(?P<MUL>\*)'
INTDIV = r'(?P<INTDIV>\/\%)'
MOD = r'(?P<MOD>\%)'
DIV = r'(?P<DIV>\/)'
POW = r'(?P<POW>\^)'
FAC=r'(?P<FAC>\!)' #factorial
COLON = r'(?P<COLON>\:)'
COMMA = r'(?P<COMMA>\,)'
SEMICOLON = r'(?P<SEMICOLON>\;)'
PERIOD = r'(?P<PERIOD>\.)'
QUESTION = r'(?P<QUESTION>\?)'
LEFT=r'(?P<LEFT>\()'
RIGHT=r'(?P<RIGHT>\))'
WS = r'(?P<WS>\s+)'
COMMENT = r'(?P<COMMENT>//[^\r\n]*|/\*.*?\*/)'
# note that lt,gt should be after le,ge and rshift, lshift
li = [STR,NUM, AND,OR,BITAND,BITOR,BITNOT,RSHIFT,LSHIFT,
EQ,NEQ,GE,LE,LT,GT,\
SUB,MOD, ADD, MUL,INTDIV,DIV, POW,FAC,NOT,\
COMMA,SEMICOLON,PERIOD, QUESTION,WS,LEFT,RIGHT,\
ASSIGN,COLON,NAME] # COLON behind ASSIGN
master_pat = re.compile('|'.join(li),re.DOTALL)
class Token:
def __init__(self,tp,value,lineNum=None):
self.type = tp
self.value= value
self.lineNum = lineNum
def __eq__(self,tk):
return self.type==tk.type and self.value==tk.value
def __repr__(self):
s = self.value if self.type!='STR' else '"{}"'.format(repr(self.value))
return '({},{},{})'.format(self.type,s,self.lineNum)
def gen_token(text):
li = text .split('\n')
beginComment=False
for i,line in enumerate(li):
s = line.lstrip()
if beginComment:
p = s.find('*/')
if p!=-1: beginComment=False
if p!=-1 and p+2<len(s):
s = s[p+2:]
else:
continue
p = s.find('//')
if p!=-1:s = s[:p]
if s=='' : continue
p = s.find('/*')
if p!=-1:
beginComment=True
s =s[:p]
scanner = master_pat.scanner(s)
for m in iter(scanner.match,None):
tok = Token(m.lastgroup,m.groupdict()[m.lastgroup],i+1)
if tok.value=='exit':exit()
if tok.type!='WS' and tok.type!='COMMENT':
yield tok
if __name__ =='__main__':
while 1:
expr = input('>> ')
for i in gen_token(expr):
print(i)

View File

@ -0,0 +1,30 @@
module Calculator where
-- calculator, integers, operators: +-*/
-- "2 / 2 + 3 * 4 - 13" == 0
-- "4 + 3 * 4 / 3 - 6 / 3 * 3 + 8" == 10
-- <grammar>
-- expr -> factor | expr {+|-} factor
-- factor -> num | factor {*|/} num
evaluate :: String -> Double
evaluate s = expr.factor.getNum.filter (\x->x/=' ') $s
getNum "" = (0,"")
getNum s = let n = length.takeWhile (\x->'0' <=x && x<='9') $s
(num,res) = splitAt n s
x = read num::Double
in (x,res)
factor (x,s) = if s=="" || s!!0 =='+' || s!!0 =='-' then (x,s)
else let op = head s
(y,s2) = getNum $tail s
z = if op=='*' then x*y else x/y
in factor (z,s2)
expr (x,s) = if s=="" then x
else let op = head s
(y,s2) = factor.getNum.tail $s
z = if op=='+' then x+y else x-y
in expr (z,s2)

View File

@ -0,0 +1,32 @@
from random import randint
def genOp(li):
return li[randint(0, len(li)-1)]
def genNum(n=20):
return randint(1, n)
def genFactor(n=3):
n = randint(1, n)
ret = [str(genNum())]
for i in range(n):
ret.append(genOp('*/'))
ret.append(str(genNum()))
return ''.join(ret)
def genExpr(n=8):
n = randint(3, n)
ret = [genFactor()]
for i in range(n):
ret.append(genOp('+-'))
ret.append(genFactor())
return ' '.join(ret)
if __name__ == '__main__':
s = genExpr()
print('evaluate "{}" == {}'.format(s, eval(s)))

View File

@ -0,0 +1,98 @@
# C-parser
>A token\_scanner and declaration parser for simplified c using LL(1)
# Rules
* size of int or pointer is 1byte
# Grammar
```scala
translation_unit
: declaration
| translation_unit declaration
;
declaration
: declaration_specifiers init_declarator_list ';'
;
declaration_specifiers
: type_specifier
;
init_declarator_list
: init_declarator
| init_declarator_list ',' init_declarator
;
init_declarator
: declarator
;
type_specifier
: VOID
| INT
;
declarator
: pointer direct_declarator
| direct_declarator
;
direct_declarator
: IDENTIFIER
| '(' declarator ')'
| direct_declarator '[' CONSTANT_INT ']'
| direct_declarator '(' parameter_type_list ')'
| direct_declarator '(' ')'
;
pointer
: '*'
| '*' pointer
;
parameter_type_list
: parameter_list
;
parameter_list
: parameter_declaration
| parameter_list ',' parameter_declaration
;
parameter_declaration
: declaration_specifiers declarator
;
```
# Examples
```c
>> int *p,q,j[2];
p::pointer(int)
q::int
j::array(2,int)
>> int *p[2][3];
p::array(2,array(3,pointer(int)))
>> int (*p[4])[2];
p::array(4,pointer(array(2,int)))
>> int (*f(int i,void *j))[2];
f::function( i::int X j::pointer(void) => pointer(array(2,int)))
>> int f(void i, void j, int p[2]);
f::function( i::void X j::void X p::array(2,int) => int)
>> int *f(int i)[2];
[Error]: Array of Functions is not allowed
>> int f[2](int k);
[Error]: Array of Function can not be returned from functions
>> void (*(*paa)[10])(int a);
paa::pointer(array(10,pointer(function( a::int => void))))
>> int (*(*(*pg())(int x))[20])(int *y);
pg::function( void => pointer(function( x::int => pointer(array(20,pointer(function( y::pointer(int) => int)))))))
>> int (*p(int * s,int (*t)(int *m, int n, int (*l())[20]),int k[10]))[10][20];
p::function( s::pointer(int) X t::pointer(function( m::pointer(int) X n::int X l::function( void => pointer(array(20,int))) => int)) X k::array(10,int) => pointer(array(10,array(20,int))))
```

View File

@ -0,0 +1,155 @@
'''
#########################################################################
# File : parser.py
# Author: mbinary
# Mail: zhuheqin1@gmail.com
# Blog: https://mbinary.xyz
# Github: https://github.com/mbinary
# Created Time: 2018-09-17 22:19
# Description:
#########################################################################
'''
import argparse
from token_scanner import gen_token
argp = argparse.ArgumentParser()
argp.add_argument('-c','--code',help="output generated code", action='store_true')
args = argp.parse_args()
CODE = args.code
class parser(object):
def __init__(self):
self.tokens =None
self.n = 0
self.i= 0
def isType(self,s):
return self.i<self.n and self.tokens[self.i].type==s
def isEnd(self):
return self.i>=self.n
def match(self,tp=None):
cur = self.tokens[self.i]
#print(self.i,tp,cur)
if tp is None or cur.type==tp:
self.i+=1
return cur
raise Exception('[parse error] Expect {}, got {}'.format(tp,cur.type))
def parse(self,tokens):
self.tokens=tokens
self.i = 0
self.n = len(self.tokens)
try:
self.statement()
if self.i<self.n:
print('[parse error] invalid statement')
except Exception as e:
print(e)
def statement(self):
pass
class declarationParser(parser):
type_size = {'INT':1,'POINTER':1,'VOID':1}
def statement(self):
'''non-terminate-symbol: translation_unit'''
while self.i<self.n:
for i in self.declaration():
print(i)
def declaration(self):
symType = self.declaration_specifiers()
li = self.init_declarator_list(symType)
self.match('SEMICOLON')
return li
def declaration_specifiers(self):
return self.type_specifier()
def type_specifier(self):
return self.match().value
def init_declarator_list(self,symType):
li = []
while 1:
li.append(self.init_declarator(symType))
if self.isType('COMMA'):
self.match()
else:break
return li
def init_declarator(self,symType):
return self.declarator(symType)
def declarator(self,symType):
np = self.pointer() # np>=0
tp = 'pointer(' *np + symType + ')'*np
return self.direct_declarator(tp)
def direct_declarator(self,tp):
args =''
inner = '$'
name = ''
if self.isType('NAME'):
name = self.match().value+'::'
elif self.isType('LEFT'): # (
self.match()
inner = self.declarator('$') #mark
self.match('RIGHT')
if self.isType('LEFT'):
self.match()
li = ['void']
if not self.isType('RIGHT'):
li = self.parameter_type_list()
self.match('RIGHT')
if self.isType('L2'):
raise Exception('[Error]: Array of Functions is not allowed')
args = ' X '.join(li)
elif self.isType('L2'):
li = []
while self.isType('L2'):
self.match()
assert self.isType('NUM')
li.append(int(self.match().value))
self.match('R2')
if self.isType('LEFT'):
raise Exception('[Error]: Array of Function can not be returned from functions')
for i in reversed(li):
tp = 'array({},{})'.format(i,tp)
if args!='':
tp = 'function( {args} => {tp})'.format(args=args,tp =tp )
return name+inner.replace('$',tp)
def pointer(self):
n = 0
while self.isType('POINTER'):
n+=1
self.match('POINTER')
return n
def parameter_type_list(self):
return self.parameter_list()
def parameter_list(self):
li = []
while 1:
argType = self.parameter_declaration()
li.append(argType)
if self.isType('COMMA'):
self.match()
else:break
return li
def parameter_declaration(self):
symType = self.declaration_specifiers()
return self.declarator(symType)
def testFromStdIO():
dp = declarationParser()
while 1:
s = input('>> ')
tk = [i for i in gen_token(s)]
dp.parse(tk)
def testFromFile(f= 'test.txt'):
dp = declarationParser()
with open(f,'r') as fp:
for line in fp:
line = line.strip(' \n')
if line.startswith('//') or line=='' :continue
print('>>',line)
tk =[i for i in gen_token(line)]
dp.parse(tk)
print()
if __name__=='__main__':
testFromFile()
testFromStdIO()

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 72 KiB

View File

@ -0,0 +1,11 @@
int *p,q,j[2];
int *p[2][3];
int (*p[4])[2];
int (*f(int i,void *j))[2];
int f(void i, void j, int p[2]);
//wrong
int *f(int i)[2];
int f[2](int k);
void (*(*paa)[10])(int a);
int (*(*(*pg())(int x))[20])(int *y);
int (*p(int * s,int (*t)(int *m, int n, int (*l())[20]),int k[10]))[10][20];

View File

@ -0,0 +1,47 @@
'''
#########################################################################
# File : token_scanner.py
# Author: mbinary
# Mail: zhuheqin1@gmail.com
# Blog: https://mbinary.xyz
# Github: https://github.com/mbinary
# Created Time: 2018-11-01 12:58
# Description:
#########################################################################
'''
import re
NAME = r'(?P<NAME>[a-zA-Z_][a-zA-Z_0-9]*)'
NUM = r'(?P<NUM>\d*\.\d+|\d+)' # note that don't use \d+|\d*\.\d+
POINTER = r'(?P<POINTER>\*)'
COMMA = r'(?P<COMMA>\,)'
SEMICOLON = r'(?P<SEMICOLON>\;)'
VOID=r'(?P<VOID>void)'
INT = r'(?P<INT>int)'
LEFT=r'(?P<LEFT>\()'
RIGHT=r'(?P<RIGHT>\))'
L2 = r'(?P<L2>\[)'
R2 = r'(?P<R2>\])'
WS = r'(?P<WS>\s+)'
COMMENT = r'(?P<COMMENT>//[^\r\n]*|/\*.*?\*/)'
master_pat = re.compile('|'.join([LEFT,RIGHT,L2,R2,POINTER,COMMA,SEMICOLON,INT,VOID,NUM, WS,NAME]),re.DOTALL)
class Token:
def __init__(self,tp,val):
self.type = tp
self.value = val
def __repr__(self):
return '({},"{}")'.format(self.type,self.value)
def gen_token(text):
scanner = master_pat.scanner(text)
for m in iter(scanner.match,None):
if m.lastgroup!='WS':
yield Token(m.lastgroup,m.group())
if __name__ =='__main__':
while 1:
expr = input('>> ')
for i in gen_token(expr):
print(i)

Binary file not shown.