''' ######################################################################### # File : token_scanner.py # Author: mbinary # Mail: zhuheqin1@gmail.com # Blog: https://mbinary.xyz # Github: https://github.com/mbinary # Created Time: 2018-09-17 22:20 # Description: ######################################################################### ''' import re STR = r'[\'\"](?P.*?)[\'\"]' # not completely correct yet NAME = r'(?P[a-zA-Z_][a-zA-Z_0-9]*)' NUM = r'(?P\d*\.\d+|\d+)' # note that don't use \d+|\d*\.\d+ ASSIGN = r'(?P\:\=)' # ODD = r'(?Podd )' EQ = r'(?P=)' NEQ = r'(?P!=)' GT = r'(?P\>)' LT = r'(?P\<)' GE = r'(?P\>\=)' LE = r'(?P\<\=)' BITNOT = r'(?P\~)' BITOR = r'(?P\|)' BITAND = r'(?P\&)' RSHIFT = r'(?P\>\>)' LSHIFT = r'(?P\<\<)' AND = r'(?P\&\&)' NOT = r'(?P\!)' OR = r'(?P\|\|)' ADD = r'(?P\+)' SUB=r'(?P\-)' MUL = r'(?P\*)' INTDIV = r'(?P\/\%)' MOD = r'(?P\%)' DIV = r'(?P
\/)' POW = r'(?P\^)' FAC=r'(?P\!)' #factorial COLON = r'(?P\:)' COMMA = r'(?P\,)' SEMICOLON = r'(?P\;)' PERIOD = r'(?P\.)' QUESTION = r'(?P\?)' LEFT=r'(?P\()' RIGHT=r'(?P\))' WS = r'(?P\s+)' COMMENT = r'(?P//[^\r\n]*|/\*.*?\*/)' # note that lt,gt should be after le,ge and rshift, lshift li = [STR,NUM, AND,OR,BITAND,BITOR,BITNOT,RSHIFT,LSHIFT, EQ,NEQ,GE,LE,LT,GT,\ SUB,MOD, ADD, MUL,INTDIV,DIV, POW,FAC,NOT,\ COMMA,SEMICOLON,PERIOD, QUESTION,WS,LEFT,RIGHT,\ ASSIGN,COLON,NAME] # COLON behind ASSIGN master_pat = re.compile('|'.join(li),re.DOTALL) class Token: def __init__(self,tp,value,lineNum=None): self.type = tp self.value= value self.lineNum = lineNum def __eq__(self,tk): return self.type==tk.type and self.value==tk.value def __repr__(self): s = self.value if self.type!='STR' else '"{}"'.format(repr(self.value)) return '({},{},{})'.format(self.type,s,self.lineNum) def gen_token(text): li = text .split('\n') beginComment=False for i,line in enumerate(li): s = line.lstrip() if beginComment: p = s.find('*/') if p!=-1: beginComment=False if p!=-1 and p+2> ') for i in gen_token(expr): print(i)