diff options
Diffstat (limited to 'intern/python/modules/simpleparse')
-rw-r--r-- | intern/python/modules/simpleparse/__init__.py | 5 | ||||
-rw-r--r-- | intern/python/modules/simpleparse/bootstrap.py | 279 | ||||
-rw-r--r-- | intern/python/modules/simpleparse/generator.py | 432 |
3 files changed, 716 insertions, 0 deletions
diff --git a/intern/python/modules/simpleparse/__init__.py b/intern/python/modules/simpleparse/__init__.py new file mode 100644 index 00000000000..961871c36a5 --- /dev/null +++ b/intern/python/modules/simpleparse/__init__.py @@ -0,0 +1,5 @@ +''' +Simple parsing using mxTextTools + +tar -cvf simpleparse.tar --exclude-from=exclude.txt +'''
\ No newline at end of file diff --git a/intern/python/modules/simpleparse/bootstrap.py b/intern/python/modules/simpleparse/bootstrap.py new file mode 100644 index 00000000000..65274bb03ee --- /dev/null +++ b/intern/python/modules/simpleparse/bootstrap.py @@ -0,0 +1,279 @@ + +from TextTools.TextTools import * + +##################################################### +# FOLLOWING IS THE BOOTSTRAP PARSER, HAND-CODED! + +parsernamelist = [ +'declarationset', # 0 +'declaration', # 1 +'implicit_group', # 2 --> no longer used +'added_token', # 3 +'seq_added_token', #4 +'fo_added_token', #5 +'or_added_token', #6 +'and_added_token', #7 +'element_token', #8 +'group', #9 +'negpos_indicator', #10 +'occurence_indicator', #11 +'unreportedname', #12 +'name', #13 +'<ts>', # 14 +'literal', #15 +'range', # 16 +'CHARBRACE', #17 +'CHARDASH', # 18 +'CHARRANGE', # 19 +'CHARNOBRACE', # 20 +'ESCAPEDCHAR', # 21 +'SPECIALESCAPEDCHAR', # 22 +'OCTALESCAPEDCHAR' # 23 +] + +parsertuplelist = range( 24 ) + + + +parsertuplelist[0] = ( # declarationset + ('declaration', TableInList,(parsertuplelist, 1)), # must be at least one declaration + ('declaration', TableInList,(parsertuplelist, 1),1,0) +) +parsertuplelist[1] = ( # declaration + (None, TableInList,(parsertuplelist, 14)), # ts + (None, SubTable, ( + ('unreportedname', TableInList,(parsertuplelist, 12),1,2), + ('name', TableInList,(parsertuplelist, 13)), # name + ) + ), + (None, TableInList,(parsertuplelist, 14)), # ts + (None, Word, ':='), + (None, TableInList,(parsertuplelist, 14)), # ts + ('element_token', TableInList,(parsertuplelist, 8)), + (None, SubTable, ( # added_token + ('seq_added_token', TableInList, (parsertuplelist,4), 1, 5 ), + ('fo_added_token', TableInList, (parsertuplelist,5), 1, 4 ), + ('or_added_token', TableInList, (parsertuplelist,6), 1, 3 ), + ('and_added_token', TableInList, (parsertuplelist,7), 1, 2 ), + (None, Fail, Here), + ('seq_added_token', TableInList, (parsertuplelist,4), 1, 0 ), + ('fo_added_token', TableInList, (parsertuplelist,5), 1, -1 ), + ('or_added_token', TableInList, (parsertuplelist,6), 1, -2 ), + ('and_added_token', TableInList, (parsertuplelist,7), 1, -3 ), + ),1,1), + (None, TableInList,(parsertuplelist, 14)), # ts +) +parsertuplelist[3] = ( # added_token + ('seq_added_token', TableInList, (parsertuplelist,4), 1, 5 ), + ('fo_added_token', TableInList, (parsertuplelist,5), 1, 4 ), + ('or_added_token', TableInList, (parsertuplelist,6), 1, 3 ), + ('and_added_token', TableInList, (parsertuplelist,7), 1, 2 ), + (None, Fail, Here), + ('seq_added_token', TableInList, (parsertuplelist,4), 1, 0 ), + ('fo_added_token', TableInList, (parsertuplelist,5), 1, -1 ), + ('or_added_token', TableInList, (parsertuplelist,6), 1, -2 ), + ('and_added_token', TableInList, (parsertuplelist,7), 1, -3 ), +) +parsertuplelist[4] = ( # seq_added_token + (None, TableInList,(parsertuplelist, 14)), # ts + (None, Is, ','), + (None, TableInList,(parsertuplelist, 14)), # ts + ('element_token', TableInList,(parsertuplelist, 8)), + (None, TableInList,(parsertuplelist, 14),4,1), # ts + (None, Is, ',',3,1), + (None, TableInList,(parsertuplelist, 14),2,1), # ts + ('element_token', TableInList,(parsertuplelist, 8),1,-3), +) +parsertuplelist[5] = ( # fo_added_token + (None, TableInList,(parsertuplelist, 14)), # ts + (None, Is, '/'), + (None, TableInList,(parsertuplelist, 14)), # ts + ('element_token', TableInList,(parsertuplelist, 8)), + (None, TableInList,(parsertuplelist, 14),4,1), # ts + (None, Is, '/',3,1), + (None, TableInList,(parsertuplelist, 14),2,1), # ts + ('element_token', TableInList,(parsertuplelist, 8),1,-3), +) +parsertuplelist[6] = ( # or_added_token + (None, TableInList,(parsertuplelist, 14)), # ts + (None, Is, '|'), + (None, TableInList,(parsertuplelist, 14)), # ts + ('element_token', TableInList,(parsertuplelist, 8)), + (None, TableInList,(parsertuplelist, 14),4,1), # ts + (None, Is, '|',3,1), + (None, TableInList,(parsertuplelist, 14),2,1), # ts + ('element_token', TableInList,(parsertuplelist, 8),1,-3), +) +parsertuplelist[7] = ( # and_added_token + (None, TableInList,(parsertuplelist, 14)), # ts + (None, Is, '&'), + (None, TableInList,(parsertuplelist, 14)), # ts + ('element_token', TableInList,(parsertuplelist, 8)), + (None, TableInList,(parsertuplelist, 14),4,1), # ts + (None, Is, '&',3,1), + (None, TableInList,(parsertuplelist, 14),2,1), # ts + ('element_token', TableInList,(parsertuplelist, 8),1,-3), +) +parsertuplelist[8] = ( # element_token + ('negpos_indicator', TableInList,(parsertuplelist, 10),1,1), + (None, TableInList,(parsertuplelist, 14),1,1), # ts, very inefficient :( + ('literal', TableInList, (parsertuplelist,15),1, 4 ), + ('range', TableInList, (parsertuplelist,16),1, 3 ), + ('group', TableInList, (parsertuplelist,9),1, 2 ), + ('name', TableInList, (parsertuplelist,13) ), + (None, TableInList,(parsertuplelist, 14),1,1), # ts, very inefficient :( + ('occurence_indicator', TableInList,(parsertuplelist, 11), 1,1), +) +parsertuplelist[9] = ( # group + (None, Is, '('), + (None, TableInList,(parsertuplelist, 14),1,1), # ts + ('element_token', TableInList, (parsertuplelist,8) ), + (None, SubTable, ( # added_token + ('seq_added_token', TableInList, (parsertuplelist,4), 1, 5 ), + ('fo_added_token', TableInList, (parsertuplelist,5), 1, 4 ), + ('or_added_token', TableInList, (parsertuplelist,6), 1, 3 ), + ('and_added_token', TableInList, (parsertuplelist,7), 1, 2 ), + (None, Fail, Here), + ('seq_added_token', TableInList, (parsertuplelist,4), 1, 0 ), + ('fo_added_token', TableInList, (parsertuplelist,5), 1, -1 ), + ('or_added_token', TableInList, (parsertuplelist,6), 1, -2 ), + ('and_added_token', TableInList, (parsertuplelist,7), 1, -3 ), + ),1,1), + (None, TableInList,(parsertuplelist, 14),1,1), # ts + (None, Is, ')'), +) +parsertuplelist[10] = ( # negpos_indicator + (None, Is, "+",1,2), + (None, Is, "-"), +) +parsertuplelist[11] = ( #occurence_indicator + (None, Is, "+",1,3), + (None, Is, "*",1,2), + (None, Is, '?'), +) +parsertuplelist[12] = ( #unreportedname + (None, Is, '<'), + ('name', TableInList, (parsertuplelist, 13)), # inefficiency in final system :( + (None, Is, '>'), +) +parsertuplelist[13] = ( # name + (None, IsIn, alpha+'_'), + (None, AllIn, alphanumeric+'_',1,1) +) + +parsertuplelist[14] = ( # ts (whitespace) + (None, AllIn, ' \011\012\013\014\015',1,1), + (None, SubTable, ( + (None, Is, '#' ), + (None, AllNotIn, '\n',1,1 ) # problem if there's a comment at the end of the file :( + ) + ,1,-1 ), + ) +# this isn't actually used in the bootstrap parser... +_specialescapedchar = parsertuplelist[22] = ( # SPECIALESCAPEDCHAR + ('SPECIALESCAPEDCHAR', IsIn, '\\abfnrtv'), +) +_octalescapechar = parsertuplelist[23] = ( # OCTALESCAPEDCHAR + (None, IsIn, '01234567'), + (None, IsIn, '01234567',2), + (None, IsIn, '01234567',1), +) +_escapedchar = parsertuplelist[21] = ( # escapedcharacter + (None, Is, '\\' ), + ('SPECIALESCAPEDCHAR', IsIn, '\\abfnrtv',1,4), + ('OCTALESCAPEDCHAR', SubTable, _octalescapechar) +) + +_charnobrace = parsertuplelist[20] = ( # charnobrace + ('ESCAPEDCHAR', Table, _escapedchar, 1,2), + ('CHAR', IsNot, ']'), +) +_rangedef = parsertuplelist[19] = ( # charrange + ('CHARNOBRACE', Table, _charnobrace ), + (None, Is, '-'), + ('CHARNOBRACE', Table, _charnobrace ), +) + + +parsertuplelist[16] = ( #range + (None, Is, '['), + ('CHARBRACE', Is, ']',1,1), + ('CHARDASH', Is, '-',1,1), + ('CHARRANGE', Table, _rangedef, 1,0), + (None, SubTable, _charnobrace, 1,-1), + (None, Is, ']') +) + +_sqstr = ( + (None, Is, "'" ), +# (None, Is, "'",1, 5 ), # immediate close + (None, AllNotIn, "\\'",1,1 ), # all not an escape or end + (None, Is, "\\", 2, 1), # is an escaped char + (None, Skip, 1, 1, -2), # consume the escaped char and loop back + (None, Is, "'" ) # in case there was no matching ', which would also cause a fail for allnotin + ) +_dblstr = ( + (None, Is, '"' ), +# (None, Is, '"',1, 5 ), # immediate close + (None, AllNotIn, '\\"' ,1,1), # not an escaped or end + (None, Is, "\\", 2, 1), # is an escaped char + (None, Skip, 1, 1, -2), # consume the escaped char and loop back + (None, Is, '"' ) # in case there was no matching ", which would also cause a fail for allnotin + ) + + + +# literal := ("'",(CHARNOSNGLQUOTE/ESCAPEDCHAR)*,"'") / ('"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"') + +parsertuplelist[15] = ( # literal + (None, Is, "'", 4, 1 ), + ('CHARNOSNGLQUOTE', AllNotIn, "\\'",1,1 ), # all not an escape or end + ('ESCAPEDCHAR', Table, _escapedchar, 1, -1), + (None, Is, "'", 1,5 ), + (None, Is, '"' ), + ('CHARNODBLQUOTE', AllNotIn, '\\"',1,1 ), # all not an escape or end + ('ESCAPEDCHAR', Table, _escapedchar, 1, -1), + (None, Is, '"'), +) + +declaration = r'''declarationset := declaration+ +declaration := ts , (unreportedname/name) ,ts,':=',ts, element_token, ( seq_added_token / fo_added_token / or_added_token / and_added_token )*, ts +seq_added_token := (ts,',',ts, element_token)+ +fo_added_token := (ts,'/',ts, element_token)+ +or_added_token := (ts,'|',ts, element_token)+ # not currently supported +and_added_token := (ts,'&',ts, element_token)+ # not currently supported +element_token := negpos_indicator?, ts, (literal/range/group/name),ts, occurence_indicator? +group := '(',ts, element_token, ( seq_added_token / fo_added_token / or_added_token / and_added_token )*, ts, ')' + +negpos_indicator := '+'/'-' +occurence_indicator := '+'/'*'/'?' +unreportedname := '<', name, '>' +name := [a-zA-Z_],[a-zA-Z0-9_]* +<ts> := ( [ \011-\015]+ / ('#',-'\n'+,'\n')+ )* +literal := ("'",(CHARNOSNGLQUOTE/ESCAPEDCHAR)*,"'") / ('"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"') + + +range := '[',CHARBRACE?,CHARDASH?, (CHARRANGE/CHARNOBRACE)*, CHARDASH?,']' +CHARBRACE := ']' +CHARDASH := '-' +CHARRANGE := CHARNOBRACE, '-', CHARNOBRACE +CHARNOBRACE := ESCAPEDCHAR/CHAR +CHAR := -[]] +ESCAPEDCHAR := '\\',( SPECIALESCAPEDCHAR / OCTALESCAPEDCHAR ) +SPECIALESCAPEDCHAR := [\\abfnrtv] +OCTALESCAPEDCHAR := [0-7],[0-7]?,[0-7]? +CHARNODBLQUOTE := -[\\"]+ +CHARNOSNGLQUOTE := -[\\']+ +''' + +def parse( instr = declaration, parserelement = 'declarationset' ): + tbl = ( + (parserelement, Table, parsertuplelist[parsernamelist.index( parserelement )] ), + ) + return tag( instr, tbl) + +if __name__ == '__main__': + import sys, pprint + pprint.pprint( apply( parse, tuple( sys.argv[1:] ) ) ) + + diff --git a/intern/python/modules/simpleparse/generator.py b/intern/python/modules/simpleparse/generator.py new file mode 100644 index 00000000000..67f83106dfe --- /dev/null +++ b/intern/python/modules/simpleparse/generator.py @@ -0,0 +1,432 @@ +from TextTools.TextTools import * +import bootstrap # the hand-coded parser +import operator, strop as string + +def err( value ): + print value + +class _BaseGenerator: + ''' + Class providing the functions required to turn a + parse tree as generated by the bootstrap parser into + a new set of parser tuples. I.e a parser generator :) + Effectively this is the bootstrap generator. + ''' + def __init__( self, syntaxstring = bootstrap.declaration, parserelement = 'declarationset' ): + ''' + Turn syntaxstring into a parsetree using + the bootstrap module's parse command + ''' + # should do some error checking in here :) + self.syntaxstring = syntaxstring + self.parsetree = bootstrap.parse( syntaxstring, parserelement )[1][0] # the child list + self.nameset = [] + self.tupleset = [] + def stringval( self, tuple ): + ''' + Return the string value for a parse-result tuple + ''' + return self.syntaxstring[ tuple[1]:tuple[2] ] + def build( self, prebuiltnodes=() ): + ''' + Build a new parsing table from the syntax string. + New parsers may be accessed using the parserbyname method. + + The pre-built nodes are parsing tables for inclusion in the grammar + Added version 1.0.1 to provide greater extensibility. + ''' + # first register all declared names to reserve their indicies + #if self.__class__.__name__ == 'Generator': + # import pdb + # pdb.set_trace() + for key, value in prebuiltnodes: + self.nameset.append( key ) + self.tupleset.append( value ) + for decl in self.parsetree[3]: + #print decl + name = self.stringval( decl[3][0] ) + self.nameset.append( name ) + self.tupleset.append( None) + #print 'Declared names:',self.nameset + for i in range( len( self.nameset)): + #print '''Processing declaration %s '''% self.nameset[i] + dataset = self.group( ('group',1,2, self.parsetree[3][i][3][1:]), self ) + if dataset: + self.tupleset[i] = tuple( dataset) + def parserbyname( self, name ): + ''' + Retrieve a single parsing tuple by its production name + ''' + try: + return self.tupleset[ self.nameset.index( name ) ] + except ValueError: + print '''Could not find parser tuple of name''', name + return () + def allparsers (self): + ''' + Return a list of (productionname, parsingtuple) values + suitable for passing to another generator as its pre-calculated + set of parsing tuples. (See method build) + ''' + returnvalue = [] + for i in range(len( self.nameset)): + returnvalue.append ( (self.nameset[i],self.tupleset[i]) ) + return returnvalue + ### Actual processing functions... + def element_token( self, eltup, genobj, reportname=None ): + # Determine the type of element + # Descry the various options for the element + negative = optional = repeating = element = None + for data in eltup[3]: + if data[0] == 'negpos_indicator': + if genobj.stringval ( data ) == '-': + negative = 1 + elif data[0] == 'occurence_indicator': + data = genobj.stringval ( data ) + if data == '*': + optional = 1 + repeating = 1 + elif data == '+': + repeating = 1 + elif data == '?': + optional = 1 + else: + err( 'Unknown occurence indicator '+ data ) + else: + element = data + # call the appropriate handler + try: + return getattr( self, element [0])( element, genobj, negative, repeating, optional) + except AttributeError,x: + err( '''Didn't find handler for element type %s, parser build aborted'''%element [0]) + raise x + + def group( self, els, genobj, negative= None, repeating=None, optional = None, reportname=None): + ''' + Determine what type of group we're dealing with and determine what + function to call, then call it. + ''' + groupset = els[3] + # groupset is an element_token followed by a possible added_token + if groupset: + els = [] + els.append( groupset[0] ) + if len(groupset) > 1: + els[len(els):] = groupset[1][3] + gtype = groupset[1][0] + if gtype == 'seq_added_token': + return self.seq( els, genobj, negative, repeating, optional, reportname ) + elif gtype == 'fo_added_token': + return self.fo( els, genobj, negative, repeating, optional, reportname ) + else: + err( '''An as-yet undefined group type was used! %s'''%gtype ) + else: # default "sequence" of one... could do more work and make it process the results specifically, but that's optimisation ;) + return self.seq( els, genobj, negative, repeating, optional, None ) + else: + return [] + + + def seq( self, els, genobj, negative= None, repeating=None, optional = None, reportname=None ): + elset = map( self.element_token, els, [genobj]*len( els) ) + elset = reduce( operator.add, elset ) + if negative: + if repeating: + if optional: + return [(None, SubTable, (( None, SubTable,( (None, SubTable, tuple( elset), 2,1), (None, Fail, Here),(None,Skip,1) ), 2,1 ), ( None, EOF, Here, -1,1 ), ), ), ] + else: # not optional + return [(None, SubTable, (( None, SubTable,( (None, SubTable, tuple( elset), 2,1), (None, Fail, Here),(None,Skip,1) )), ( None, SubTable,( (None, SubTable, tuple( elset), 2,1), (None, Fail, Here),(None,Skip,1) ), 2,1 ), ( None, EOF, Here, -1,1 ), ), ), ] + else: # single + if optional: + return [ (None, SubTable, ( (None, SubTable, tuple( elset), 2,1), (None, Fail, Here), (None, Skip, 1) ),1,1) ] + else: # not optional + return [ (None, SubTable, ( (None, SubTable, tuple( elset), 2,1), (None, Fail, Here), (None, Skip, 1) )) ] + else: # positive + if repeating: + if optional: + return [ (None, SubTable, tuple( elset), 1,0) ] + else: # not optional + + return [ (None, SubTable, tuple( elset)), (None, SubTable, tuple( elset), 1,0) ] + else: # single + if optional: + return [ (None, SubTable, tuple( elset), 1,1) ] + else: # not optional + return [ (None, SubTable, tuple( elset)) ] + + def fo( self, els, genobj, negative= None, repeating=None, optional = None, reportname=None ): + elset = map( self.element_token, els, [genobj]*len( els) ) + elset = reduce( operator.add, elset ) + elset = [] + for el in els: + dataset = self.element_token( el, genobj ) + if len( dataset) == 1 and len(dataset[0]) == 3: # we can alter the jump states with impunity + elset.append( dataset[0] ) + else: # for now I'm eating the inefficiency and doing an extra SubTable for all elements to allow for easy calculation of jumps within the FO group + elset.append( (None, SubTable, tuple( dataset )) ) + if negative: + # all negative FO's have the meaning "a positive, single, non-optional FO not matching" + # the flags modify how failure and continuation are handled in that case, so they can use + # the same procset. + # Note: Negative FO groups are _very_ heavy, they have normally about 4 subtable calls + # guess we'll find out how well mxTextTools handles recursive tables :) + procset = [] + for i in range( len( elset) -1): # note that we have to treat last el specially + ival = elset[i] + (1,len(elset)-i) + procset.append( ival ) # if success, jump past end + procset.append( elset[-1] + (2,1) ) # will cause a failure if last element doesn't match + procset.append( (None, Fail, Here ) ) + procset.append( (None, Skip, 1) ) + # if the following looks familiar you probably looked at seq above + if repeating: + if optional: + return [ (None, SubTable, ( (None, SubTable, tuple( procset), 2,1), (None, EOF, Here,-1,1) ) ) ] + else: # not optional + return [ (None, SubTable, ( (None, SubTable, tuple( procset)),(None, SubTable, tuple( procset), 2,1), (None, EOF, Here,-1,1) ) ) ] + else: # single + if optional: + return [ (None, SubTable, tuple( procset), 1,1) ] + else: # not optional + return [ (None, SubTable, tuple( procset) ) ] + else: # positive + if repeating: + if optional: + procset = [] + for i in range( len( elset)): + procset.append( elset[i] + (1,-i) ) # if success, go back to start which is -i elements back + return procset + else: # not optional + procset = [] + for i in range( len( elset)-1): + procset.append( elset[i] + (1, len(elset)-i+1) ) # if success, jump to later section + procset.append( elset[-1] + ( 1, 2) ) # will cause a failure if last element doesn't match using an explicit fail command + procset.append( (None, Fail, Here) ) # will cause a failure if last element doesn't match using an explicit fail command + for i in range( len( elset)-1): + procset.append( elset[i] + (1, -i) ) # if success, go back to start which is -i elements back + procset.append( elset[-1] + ( 1, 1-(len(elset)) ) ) # will cause a failure if last element doesn't match using an explicit fail command + return procset + else: # single + if optional: + procset = [] + for i in range( len( elset)): + procset.append( elset[i] + (1,len(elset)-i) ) # if success, jump past end + return procset + else: # not optional + procset = [] + for i in range( len( elset) -1): # note that we have to treat last el specially + procset.append( elset[i] + (1,len(elset)-i) ) # if success, jump past end + procset.append( elset[-1] ) # will cause a failure if last element doesn't match + return procset + + def name( self, value, genobj, negative = None, repeating = None, optional = None, reportname=None ): + svalue = genobj.stringval( value ) + try: + sindex = genobj.nameset.index( svalue ) + except ValueError: # eeps, a value not declared + try: + sindex = genobj.nameset.index( '<'+svalue+'>' ) + svalue = None + except ValueError: + err( '''The name %s could not be found in the declarationset. The parser will not compile.'''%svalue) + genobj.nameset.append( svalue ) + genobj.tupleset.append( None ) + sindex = len( genobj.nameset) - 1 + if negative: + if repeating: + if optional: + return [ (svalue, SubTable, ( (None, TableInList, (genobj.tupleset, sindex), 1,3), (None, EOF, Here,1,2), (None,Skip,1,-2,-2) ) ) ] + else: # not optional + return [ (svalue, SubTable, ( (None, TableInList, (genobj.tupleset, sindex),2,1),(None, Fail, Here),(None, Skip, 1), (None, TableInList, (genobj.tupleset, sindex), 1,3), (None, EOF, Here,1,2), (None,Skip,1,-2,-2) ) ) ] + else: # single + if optional: + return [ (None, SubTable, ( (None, TableInList, (genobj.tupleset, sindex),2,1),(None, Fail, Here),(svalue, Skip, 1) ),1,1) ] + else: # not optional + return [ (None, SubTable, ( (None, TableInList, (genobj.tupleset, sindex),2,1),(None, Fail, Here),(svalue, Skip, 1) )) ] + else: # positive + if repeating: + if optional: + return [ (svalue, TableInList, (genobj.tupleset, sindex), 1,0) ] + else: # not optional + return [ (svalue, TableInList, (genobj.tupleset, sindex)), (svalue, TableInList, (genobj.tupleset, sindex),1,0) ] + else: # single + if optional: + return [ (svalue, TableInList, (genobj.tupleset, sindex), 1,1) ] + else: # not optional + return [ (svalue, TableInList, (genobj.tupleset, sindex)) ] + specialescapedmap = { + 'a':'\a', + 'b':'\b', + 'f':'\f', + 'n':'\n', + 'r':'\r', + 't':'\t', + 'v':'\v', + '\\':'\\', + '"':'"', + "'":"'", + } + + def escapedchar( self, el, genobj ): + svalue = '' + if el[3][0][0] == 'SPECIALESCAPEDCHAR': + svalue = svalue + self.specialescapedmap[ genobj.stringval( el[3][0] ) ] + elif el[3][0][0] == 'OCTALESCAPEDCHAR': + #print 'OCTALESCAPEDCHAR', genobj.stringval( el) + ovnum = 0 + ovpow = 0 + ov = genobj.stringval( el[3][0] ) + while ov: + ovnum = ovnum + int( ov[-1] ) * (8**ovpow) + ovpow = ovpow + 1 + ov = ov[:-1] + svalue = svalue + chr( ovnum ) + #print 'svalue ', `svalue` + return svalue + + + def literal( self, value, genobj, negative = None, repeating=None, optional=None, reportname=None ): + ''' + Calculate the tag-table for a literal element token + ''' + svalue = '' + for el in value[3]: + if el[0] in ('CHARNOSNGLQUOTE', 'CHARNODBLQUOTE'): + svalue = svalue+genobj.stringval( el ) + elif el[0] == 'ESCAPEDCHAR': + svalue = svalue + self.escapedchar( el, genobj ) + #print 'literal value', `genobj.stringval( value )` + #print ' svalue', `svalue` + # svalue = svalue[1:-1] + if negative: + if repeating: # a repeating negative value, a "search" in effect + if optional: # if fails, then go to end of file + return [ (None, sWordStart, BMS( svalue ),1,2), (None, Move, ToEOF ) ] + else: # must first check to make sure the current position is not the word, then the same + return [ (None, Word, svalue, 2,1),(None, Fail, Here),(None, sWordStart, BMS( svalue ),1,2), (None, Move, ToEOF ) ] + #return [ (None, Word, svalue, 2,1),(None, Fail, Here),(None, WordStart, svalue,1,2), (None, Move, ToEOF ) ] + else: # a single-character test saying "not a this" + if optional: # test for a success, move back if success, move one forward if failure + if len(svalue) > 1: + return [ (None, Word, svalue, 2,1), + (None, Skip, -len(svalue), 2,2), # backup if this was the word to start of word, succeed + (None, Skip, 1 ) ] # else just move one character and succeed + else: # Uses Is test instead of Word test, should be faster I'd imagine + return [ (None, Is, svalue, 2,1), + (None, Skip, -1, 2,2), # backtrack + (None, Skip, 1 ) ] # else just move one character and succeed + else: # must find at least one character not part of the word, so + if len(svalue) > 1: + return [ (None, Word, svalue, 2,1), + (None, Fail, Here), + (None, Skip, 1 ) ] # else just move one character and succeed + else: #must fail if it finds or move one forward + return [ (None, Is, svalue, 2,1), + (None, Fail, Here), + (None, Skip, 1 ) ] # else just move one character and succeed + else: # positive + if repeating: + if optional: + if len(svalue) > 1: + return [ (None, Word, svalue, 1,0) ] + else: + return [ (None, Is, svalue, 1,0) ] + else: # not optional + if len(svalue) > 1: + return [ (None, Word, svalue),(None, Word, svalue,1,0) ] + else: + return [ (None, Is, svalue),(None, Is, svalue,1,0) ] + else: # not repeating + if optional: + if len(svalue) > 1: + return [ (None, Word, svalue, 1,1) ] + else: + return [ (None, Is, svalue, 1,1) ] + else: # not optional + if len(svalue) > 1: + return [ (None, Word, svalue) ] + else: + return [ (None, Word, svalue) ] + + def charnobrace( self, cval, genobj ): + #print 'cval', cval + if cval[3][0][0] == 'ESCAPEDCHAR': + return self.escapedchar( cval[3][0], genobj ) + #print '''Straight non-brace character''', `genobj.stringval( cval[3][0] )` + return genobj.stringval( cval ) + def range( self, value, genobj, negative = None, repeating=None, optional=None, reportname=None ): + dataset = [] + for cval in value[3]: + if cval[0] == 'CHARBRACE': + dataset.append( ']') + elif cval[0] == 'CHARDASH': + dataset.append( '-') + elif cval[0] == 'CHARNOBRACE': + dataset.append( self.charnobrace( cval, genobj ) ) + elif cval[0] == 'CHARRANGE': + start = ord( self.charnobrace( cval[3][0], genobj ) ) + end = ord( self.charnobrace( cval[3][1], genobj ) ) + if start < end: + dataset.append( string.join( map( chr, range( start, end +1 ) ), '' ) ) + else: + dataset.append( string.join( map( chr, range( end, start +1 ) ), '' ) ) + else: + dataset.append( genobj.stringval( cval ) ) + if negative: + #svalue = set( string.join( dataset, '' ), 0 ) + svalue = string.join( dataset, '' ) + else: + #svalue = set( string.join( dataset, '' ), 1) + svalue = string.join( dataset, '' ) + if negative: + if repeating: + if optional: + #return [ (None, AllInSet, svalue, 1 ) ] + return [ (None, AllNotIn, svalue, 1 ) ] + else: # not optional + #return [ (None, AllInSet, svalue ) ] + return [ (None, AllNotIn, svalue ) ] + else: # not repeating + if optional: + #return [ (None, IsInSet, svalue, 1 ) ] + return [ (None, IsNotIn, svalue, 1 ) ] + else: # not optional + #return [ (None, IsInSet, svalue ) ] + return [ (None, IsNotIn, svalue ) ] + else: + if repeating: + if optional: + #return [ (None, AllInSet, svalue, 1 ) ] + return [ (None, AllIn, svalue, 1 ) ] + else: # not optional + #return [ (None, AllInSet, svalue ) ] + return [ (None, AllIn, svalue ) ] + else: # not repeating + if optional: + #return [ (None, IsInSet, svalue, 1 ) ] + return [ (None, IsIn, svalue, 1 ) ] + else: # not optional + #return [ (None, IsInSet, svalue ) ] + return [ (None, IsIn, svalue ) ] + +class Generator( _BaseGenerator ): + def __init__( self, syntaxstring , parser ): + self.syntaxstring = syntaxstring + self.parsetree = [0,1,2, tag( syntaxstring, parser )[1] ] + self.nameset = [] + self.tupleset = [] + +def buildParser( declaration, prebuiltnodes=() ): + ''' + End-developer function to create an application-specific parser + the parsing tuple is available on the returned object as + object.parserbyname( 'declaredname' ), where declaredname is the + name you defined in your language defintion file. + + The declaration argument is the text of a language defintion file. + ''' + proc = _BaseGenerator( ) + proc.build() + newgen = Generator( declaration, proc.parserbyname( 'declarationset' ) ) + newgen.build( prebuiltnodes=prebuiltnodes ) + return newgen + + |