diff options
Diffstat (limited to 'intern/python/modules/simpleparse/bootstrap.py')
-rw-r--r-- | intern/python/modules/simpleparse/bootstrap.py | 279 |
1 files changed, 279 insertions, 0 deletions
diff --git a/intern/python/modules/simpleparse/bootstrap.py b/intern/python/modules/simpleparse/bootstrap.py new file mode 100644 index 00000000000..65274bb03ee --- /dev/null +++ b/intern/python/modules/simpleparse/bootstrap.py @@ -0,0 +1,279 @@ + +from TextTools.TextTools import * + +##################################################### +# FOLLOWING IS THE BOOTSTRAP PARSER, HAND-CODED! + +parsernamelist = [ +'declarationset', # 0 +'declaration', # 1 +'implicit_group', # 2 --> no longer used +'added_token', # 3 +'seq_added_token', #4 +'fo_added_token', #5 +'or_added_token', #6 +'and_added_token', #7 +'element_token', #8 +'group', #9 +'negpos_indicator', #10 +'occurence_indicator', #11 +'unreportedname', #12 +'name', #13 +'<ts>', # 14 +'literal', #15 +'range', # 16 +'CHARBRACE', #17 +'CHARDASH', # 18 +'CHARRANGE', # 19 +'CHARNOBRACE', # 20 +'ESCAPEDCHAR', # 21 +'SPECIALESCAPEDCHAR', # 22 +'OCTALESCAPEDCHAR' # 23 +] + +parsertuplelist = range( 24 ) + + + +parsertuplelist[0] = ( # declarationset + ('declaration', TableInList,(parsertuplelist, 1)), # must be at least one declaration + ('declaration', TableInList,(parsertuplelist, 1),1,0) +) +parsertuplelist[1] = ( # declaration + (None, TableInList,(parsertuplelist, 14)), # ts + (None, SubTable, ( + ('unreportedname', TableInList,(parsertuplelist, 12),1,2), + ('name', TableInList,(parsertuplelist, 13)), # name + ) + ), + (None, TableInList,(parsertuplelist, 14)), # ts + (None, Word, ':='), + (None, TableInList,(parsertuplelist, 14)), # ts + ('element_token', TableInList,(parsertuplelist, 8)), + (None, SubTable, ( # added_token + ('seq_added_token', TableInList, (parsertuplelist,4), 1, 5 ), + ('fo_added_token', TableInList, (parsertuplelist,5), 1, 4 ), + ('or_added_token', TableInList, (parsertuplelist,6), 1, 3 ), + ('and_added_token', TableInList, (parsertuplelist,7), 1, 2 ), + (None, Fail, Here), + ('seq_added_token', TableInList, (parsertuplelist,4), 1, 0 ), + ('fo_added_token', TableInList, (parsertuplelist,5), 1, -1 ), + ('or_added_token', TableInList, (parsertuplelist,6), 1, -2 ), + ('and_added_token', TableInList, (parsertuplelist,7), 1, -3 ), + ),1,1), + (None, TableInList,(parsertuplelist, 14)), # ts +) +parsertuplelist[3] = ( # added_token + ('seq_added_token', TableInList, (parsertuplelist,4), 1, 5 ), + ('fo_added_token', TableInList, (parsertuplelist,5), 1, 4 ), + ('or_added_token', TableInList, (parsertuplelist,6), 1, 3 ), + ('and_added_token', TableInList, (parsertuplelist,7), 1, 2 ), + (None, Fail, Here), + ('seq_added_token', TableInList, (parsertuplelist,4), 1, 0 ), + ('fo_added_token', TableInList, (parsertuplelist,5), 1, -1 ), + ('or_added_token', TableInList, (parsertuplelist,6), 1, -2 ), + ('and_added_token', TableInList, (parsertuplelist,7), 1, -3 ), +) +parsertuplelist[4] = ( # seq_added_token + (None, TableInList,(parsertuplelist, 14)), # ts + (None, Is, ','), + (None, TableInList,(parsertuplelist, 14)), # ts + ('element_token', TableInList,(parsertuplelist, 8)), + (None, TableInList,(parsertuplelist, 14),4,1), # ts + (None, Is, ',',3,1), + (None, TableInList,(parsertuplelist, 14),2,1), # ts + ('element_token', TableInList,(parsertuplelist, 8),1,-3), +) +parsertuplelist[5] = ( # fo_added_token + (None, TableInList,(parsertuplelist, 14)), # ts + (None, Is, '/'), + (None, TableInList,(parsertuplelist, 14)), # ts + ('element_token', TableInList,(parsertuplelist, 8)), + (None, TableInList,(parsertuplelist, 14),4,1), # ts + (None, Is, '/',3,1), + (None, TableInList,(parsertuplelist, 14),2,1), # ts + ('element_token', TableInList,(parsertuplelist, 8),1,-3), +) +parsertuplelist[6] = ( # or_added_token + (None, TableInList,(parsertuplelist, 14)), # ts + (None, Is, '|'), + (None, TableInList,(parsertuplelist, 14)), # ts + ('element_token', TableInList,(parsertuplelist, 8)), + (None, TableInList,(parsertuplelist, 14),4,1), # ts + (None, Is, '|',3,1), + (None, TableInList,(parsertuplelist, 14),2,1), # ts + ('element_token', TableInList,(parsertuplelist, 8),1,-3), +) +parsertuplelist[7] = ( # and_added_token + (None, TableInList,(parsertuplelist, 14)), # ts + (None, Is, '&'), + (None, TableInList,(parsertuplelist, 14)), # ts + ('element_token', TableInList,(parsertuplelist, 8)), + (None, TableInList,(parsertuplelist, 14),4,1), # ts + (None, Is, '&',3,1), + (None, TableInList,(parsertuplelist, 14),2,1), # ts + ('element_token', TableInList,(parsertuplelist, 8),1,-3), +) +parsertuplelist[8] = ( # element_token + ('negpos_indicator', TableInList,(parsertuplelist, 10),1,1), + (None, TableInList,(parsertuplelist, 14),1,1), # ts, very inefficient :( + ('literal', TableInList, (parsertuplelist,15),1, 4 ), + ('range', TableInList, (parsertuplelist,16),1, 3 ), + ('group', TableInList, (parsertuplelist,9),1, 2 ), + ('name', TableInList, (parsertuplelist,13) ), + (None, TableInList,(parsertuplelist, 14),1,1), # ts, very inefficient :( + ('occurence_indicator', TableInList,(parsertuplelist, 11), 1,1), +) +parsertuplelist[9] = ( # group + (None, Is, '('), + (None, TableInList,(parsertuplelist, 14),1,1), # ts + ('element_token', TableInList, (parsertuplelist,8) ), + (None, SubTable, ( # added_token + ('seq_added_token', TableInList, (parsertuplelist,4), 1, 5 ), + ('fo_added_token', TableInList, (parsertuplelist,5), 1, 4 ), + ('or_added_token', TableInList, (parsertuplelist,6), 1, 3 ), + ('and_added_token', TableInList, (parsertuplelist,7), 1, 2 ), + (None, Fail, Here), + ('seq_added_token', TableInList, (parsertuplelist,4), 1, 0 ), + ('fo_added_token', TableInList, (parsertuplelist,5), 1, -1 ), + ('or_added_token', TableInList, (parsertuplelist,6), 1, -2 ), + ('and_added_token', TableInList, (parsertuplelist,7), 1, -3 ), + ),1,1), + (None, TableInList,(parsertuplelist, 14),1,1), # ts + (None, Is, ')'), +) +parsertuplelist[10] = ( # negpos_indicator + (None, Is, "+",1,2), + (None, Is, "-"), +) +parsertuplelist[11] = ( #occurence_indicator + (None, Is, "+",1,3), + (None, Is, "*",1,2), + (None, Is, '?'), +) +parsertuplelist[12] = ( #unreportedname + (None, Is, '<'), + ('name', TableInList, (parsertuplelist, 13)), # inefficiency in final system :( + (None, Is, '>'), +) +parsertuplelist[13] = ( # name + (None, IsIn, alpha+'_'), + (None, AllIn, alphanumeric+'_',1,1) +) + +parsertuplelist[14] = ( # ts (whitespace) + (None, AllIn, ' \011\012\013\014\015',1,1), + (None, SubTable, ( + (None, Is, '#' ), + (None, AllNotIn, '\n',1,1 ) # problem if there's a comment at the end of the file :( + ) + ,1,-1 ), + ) +# this isn't actually used in the bootstrap parser... +_specialescapedchar = parsertuplelist[22] = ( # SPECIALESCAPEDCHAR + ('SPECIALESCAPEDCHAR', IsIn, '\\abfnrtv'), +) +_octalescapechar = parsertuplelist[23] = ( # OCTALESCAPEDCHAR + (None, IsIn, '01234567'), + (None, IsIn, '01234567',2), + (None, IsIn, '01234567',1), +) +_escapedchar = parsertuplelist[21] = ( # escapedcharacter + (None, Is, '\\' ), + ('SPECIALESCAPEDCHAR', IsIn, '\\abfnrtv',1,4), + ('OCTALESCAPEDCHAR', SubTable, _octalescapechar) +) + +_charnobrace = parsertuplelist[20] = ( # charnobrace + ('ESCAPEDCHAR', Table, _escapedchar, 1,2), + ('CHAR', IsNot, ']'), +) +_rangedef = parsertuplelist[19] = ( # charrange + ('CHARNOBRACE', Table, _charnobrace ), + (None, Is, '-'), + ('CHARNOBRACE', Table, _charnobrace ), +) + + +parsertuplelist[16] = ( #range + (None, Is, '['), + ('CHARBRACE', Is, ']',1,1), + ('CHARDASH', Is, '-',1,1), + ('CHARRANGE', Table, _rangedef, 1,0), + (None, SubTable, _charnobrace, 1,-1), + (None, Is, ']') +) + +_sqstr = ( + (None, Is, "'" ), +# (None, Is, "'",1, 5 ), # immediate close + (None, AllNotIn, "\\'",1,1 ), # all not an escape or end + (None, Is, "\\", 2, 1), # is an escaped char + (None, Skip, 1, 1, -2), # consume the escaped char and loop back + (None, Is, "'" ) # in case there was no matching ', which would also cause a fail for allnotin + ) +_dblstr = ( + (None, Is, '"' ), +# (None, Is, '"',1, 5 ), # immediate close + (None, AllNotIn, '\\"' ,1,1), # not an escaped or end + (None, Is, "\\", 2, 1), # is an escaped char + (None, Skip, 1, 1, -2), # consume the escaped char and loop back + (None, Is, '"' ) # in case there was no matching ", which would also cause a fail for allnotin + ) + + + +# literal := ("'",(CHARNOSNGLQUOTE/ESCAPEDCHAR)*,"'") / ('"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"') + +parsertuplelist[15] = ( # literal + (None, Is, "'", 4, 1 ), + ('CHARNOSNGLQUOTE', AllNotIn, "\\'",1,1 ), # all not an escape or end + ('ESCAPEDCHAR', Table, _escapedchar, 1, -1), + (None, Is, "'", 1,5 ), + (None, Is, '"' ), + ('CHARNODBLQUOTE', AllNotIn, '\\"',1,1 ), # all not an escape or end + ('ESCAPEDCHAR', Table, _escapedchar, 1, -1), + (None, Is, '"'), +) + +declaration = r'''declarationset := declaration+ +declaration := ts , (unreportedname/name) ,ts,':=',ts, element_token, ( seq_added_token / fo_added_token / or_added_token / and_added_token )*, ts +seq_added_token := (ts,',',ts, element_token)+ +fo_added_token := (ts,'/',ts, element_token)+ +or_added_token := (ts,'|',ts, element_token)+ # not currently supported +and_added_token := (ts,'&',ts, element_token)+ # not currently supported +element_token := negpos_indicator?, ts, (literal/range/group/name),ts, occurence_indicator? +group := '(',ts, element_token, ( seq_added_token / fo_added_token / or_added_token / and_added_token )*, ts, ')' + +negpos_indicator := '+'/'-' +occurence_indicator := '+'/'*'/'?' +unreportedname := '<', name, '>' +name := [a-zA-Z_],[a-zA-Z0-9_]* +<ts> := ( [ \011-\015]+ / ('#',-'\n'+,'\n')+ )* +literal := ("'",(CHARNOSNGLQUOTE/ESCAPEDCHAR)*,"'") / ('"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"') + + +range := '[',CHARBRACE?,CHARDASH?, (CHARRANGE/CHARNOBRACE)*, CHARDASH?,']' +CHARBRACE := ']' +CHARDASH := '-' +CHARRANGE := CHARNOBRACE, '-', CHARNOBRACE +CHARNOBRACE := ESCAPEDCHAR/CHAR +CHAR := -[]] +ESCAPEDCHAR := '\\',( SPECIALESCAPEDCHAR / OCTALESCAPEDCHAR ) +SPECIALESCAPEDCHAR := [\\abfnrtv] +OCTALESCAPEDCHAR := [0-7],[0-7]?,[0-7]? +CHARNODBLQUOTE := -[\\"]+ +CHARNOSNGLQUOTE := -[\\']+ +''' + +def parse( instr = declaration, parserelement = 'declarationset' ): + tbl = ( + (parserelement, Table, parsertuplelist[parsernamelist.index( parserelement )] ), + ) + return tag( instr, tbl) + +if __name__ == '__main__': + import sys, pprint + pprint.pprint( apply( parse, tuple( sys.argv[1:] ) ) ) + + |