1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
|
from TextTools.TextTools import *
import bootstrap # the hand-coded parser
import operator, strop as string
def err( value ):
print value
class _BaseGenerator:
'''
Class providing the functions required to turn a
parse tree as generated by the bootstrap parser into
a new set of parser tuples. I.e a parser generator :)
Effectively this is the bootstrap generator.
'''
def __init__( self, syntaxstring = bootstrap.declaration, parserelement = 'declarationset' ):
'''
Turn syntaxstring into a parsetree using
the bootstrap module's parse command
'''
# should do some error checking in here :)
self.syntaxstring = syntaxstring
self.parsetree = bootstrap.parse( syntaxstring, parserelement )[1][0] # the child list
self.nameset = []
self.tupleset = []
def stringval( self, tuple ):
'''
Return the string value for a parse-result tuple
'''
return self.syntaxstring[ tuple[1]:tuple[2] ]
def build( self, prebuiltnodes=() ):
'''
Build a new parsing table from the syntax string.
New parsers may be accessed using the parserbyname method.
The pre-built nodes are parsing tables for inclusion in the grammar
Added version 1.0.1 to provide greater extensibility.
'''
# first register all declared names to reserve their indicies
#if self.__class__.__name__ == 'Generator':
# import pdb
# pdb.set_trace()
for key, value in prebuiltnodes:
self.nameset.append( key )
self.tupleset.append( value )
for decl in self.parsetree[3]:
#print decl
name = self.stringval( decl[3][0] )
self.nameset.append( name )
self.tupleset.append( None)
#print 'Declared names:',self.nameset
for i in range( len( self.nameset)):
#print '''Processing declaration %s '''% self.nameset[i]
dataset = self.group( ('group',1,2, self.parsetree[3][i][3][1:]), self )
if dataset:
self.tupleset[i] = tuple( dataset)
def parserbyname( self, name ):
'''
Retrieve a single parsing tuple by its production name
'''
try:
return self.tupleset[ self.nameset.index( name ) ]
except ValueError:
print '''Could not find parser tuple of name''', name
return ()
def allparsers (self):
'''
Return a list of (productionname, parsingtuple) values
suitable for passing to another generator as its pre-calculated
set of parsing tuples. (See method build)
'''
returnvalue = []
for i in range(len( self.nameset)):
returnvalue.append ( (self.nameset[i],self.tupleset[i]) )
return returnvalue
### Actual processing functions...
def element_token( self, eltup, genobj, reportname=None ):
# Determine the type of element
# Descry the various options for the element
negative = optional = repeating = element = None
for data in eltup[3]:
if data[0] == 'negpos_indicator':
if genobj.stringval ( data ) == '-':
negative = 1
elif data[0] == 'occurence_indicator':
data = genobj.stringval ( data )
if data == '*':
optional = 1
repeating = 1
elif data == '+':
repeating = 1
elif data == '?':
optional = 1
else:
err( 'Unknown occurence indicator '+ data )
else:
element = data
# call the appropriate handler
try:
return getattr( self, element [0])( element, genobj, negative, repeating, optional)
except AttributeError,x:
err( '''Didn't find handler for element type %s, parser build aborted'''%element [0])
raise x
def group( self, els, genobj, negative= None, repeating=None, optional = None, reportname=None):
'''
Determine what type of group we're dealing with and determine what
function to call, then call it.
'''
groupset = els[3]
# groupset is an element_token followed by a possible added_token
if groupset:
els = []
els.append( groupset[0] )
if len(groupset) > 1:
els[len(els):] = groupset[1][3]
gtype = groupset[1][0]
if gtype == 'seq_added_token':
return self.seq( els, genobj, negative, repeating, optional, reportname )
elif gtype == 'fo_added_token':
return self.fo( els, genobj, negative, repeating, optional, reportname )
else:
err( '''An as-yet undefined group type was used! %s'''%gtype )
else: # default "sequence" of one... could do more work and make it process the results specifically, but that's optimisation ;)
return self.seq( els, genobj, negative, repeating, optional, None )
else:
return []
def seq( self, els, genobj, negative= None, repeating=None, optional = None, reportname=None ):
elset = map( self.element_token, els, [genobj]*len( els) )
elset = reduce( operator.add, elset )
if negative:
if repeating:
if optional:
return [(None, SubTable, (( None, SubTable,( (None, SubTable, tuple( elset), 2,1), (None, Fail, Here),(None,Skip,1) ), 2,1 ), ( None, EOF, Here, -1,1 ), ), ), ]
else: # not optional
return [(None, SubTable, (( None, SubTable,( (None, SubTable, tuple( elset), 2,1), (None, Fail, Here),(None,Skip,1) )), ( None, SubTable,( (None, SubTable, tuple( elset), 2,1), (None, Fail, Here),(None,Skip,1) ), 2,1 ), ( None, EOF, Here, -1,1 ), ), ), ]
else: # single
if optional:
return [ (None, SubTable, ( (None, SubTable, tuple( elset), 2,1), (None, Fail, Here), (None, Skip, 1) ),1,1) ]
else: # not optional
return [ (None, SubTable, ( (None, SubTable, tuple( elset), 2,1), (None, Fail, Here), (None, Skip, 1) )) ]
else: # positive
if repeating:
if optional:
return [ (None, SubTable, tuple( elset), 1,0) ]
else: # not optional
return [ (None, SubTable, tuple( elset)), (None, SubTable, tuple( elset), 1,0) ]
else: # single
if optional:
return [ (None, SubTable, tuple( elset), 1,1) ]
else: # not optional
return [ (None, SubTable, tuple( elset)) ]
def fo( self, els, genobj, negative= None, repeating=None, optional = None, reportname=None ):
elset = map( self.element_token, els, [genobj]*len( els) )
elset = reduce( operator.add, elset )
elset = []
for el in els:
dataset = self.element_token( el, genobj )
if len( dataset) == 1 and len(dataset[0]) == 3: # we can alter the jump states with impunity
elset.append( dataset[0] )
else: # for now I'm eating the inefficiency and doing an extra SubTable for all elements to allow for easy calculation of jumps within the FO group
elset.append( (None, SubTable, tuple( dataset )) )
if negative:
# all negative FO's have the meaning "a positive, single, non-optional FO not matching"
# the flags modify how failure and continuation are handled in that case, so they can use
# the same procset.
# Note: Negative FO groups are _very_ heavy, they have normally about 4 subtable calls
# guess we'll find out how well mxTextTools handles recursive tables :)
procset = []
for i in range( len( elset) -1): # note that we have to treat last el specially
ival = elset[i] + (1,len(elset)-i)
procset.append( ival ) # if success, jump past end
procset.append( elset[-1] + (2,1) ) # will cause a failure if last element doesn't match
procset.append( (None, Fail, Here ) )
procset.append( (None, Skip, 1) )
# if the following looks familiar you probably looked at seq above
if repeating:
if optional:
return [ (None, SubTable, ( (None, SubTable, tuple( procset), 2,1), (None, EOF, Here,-1,1) ) ) ]
else: # not optional
return [ (None, SubTable, ( (None, SubTable, tuple( procset)),(None, SubTable, tuple( procset), 2,1), (None, EOF, Here,-1,1) ) ) ]
else: # single
if optional:
return [ (None, SubTable, tuple( procset), 1,1) ]
else: # not optional
return [ (None, SubTable, tuple( procset) ) ]
else: # positive
if repeating:
if optional:
procset = []
for i in range( len( elset)):
procset.append( elset[i] + (1,-i) ) # if success, go back to start which is -i elements back
return procset
else: # not optional
procset = []
for i in range( len( elset)-1):
procset.append( elset[i] + (1, len(elset)-i+1) ) # if success, jump to later section
procset.append( elset[-1] + ( 1, 2) ) # will cause a failure if last element doesn't match using an explicit fail command
procset.append( (None, Fail, Here) ) # will cause a failure if last element doesn't match using an explicit fail command
for i in range( len( elset)-1):
procset.append( elset[i] + (1, -i) ) # if success, go back to start which is -i elements back
procset.append( elset[-1] + ( 1, 1-(len(elset)) ) ) # will cause a failure if last element doesn't match using an explicit fail command
return procset
else: # single
if optional:
procset = []
for i in range( len( elset)):
procset.append( elset[i] + (1,len(elset)-i) ) # if success, jump past end
return procset
else: # not optional
procset = []
for i in range( len( elset) -1): # note that we have to treat last el specially
procset.append( elset[i] + (1,len(elset)-i) ) # if success, jump past end
procset.append( elset[-1] ) # will cause a failure if last element doesn't match
return procset
def name( self, value, genobj, negative = None, repeating = None, optional = None, reportname=None ):
svalue = genobj.stringval( value )
try:
sindex = genobj.nameset.index( svalue )
except ValueError: # eeps, a value not declared
try:
sindex = genobj.nameset.index( '<'+svalue+'>' )
svalue = None
except ValueError:
err( '''The name %s could not be found in the declarationset. The parser will not compile.'''%svalue)
genobj.nameset.append( svalue )
genobj.tupleset.append( None )
sindex = len( genobj.nameset) - 1
if negative:
if repeating:
if optional:
return [ (svalue, SubTable, ( (None, TableInList, (genobj.tupleset, sindex), 1,3), (None, EOF, Here,1,2), (None,Skip,1,-2,-2) ) ) ]
else: # not optional
return [ (svalue, SubTable, ( (None, TableInList, (genobj.tupleset, sindex),2,1),(None, Fail, Here),(None, Skip, 1), (None, TableInList, (genobj.tupleset, sindex), 1,3), (None, EOF, Here,1,2), (None,Skip,1,-2,-2) ) ) ]
else: # single
if optional:
return [ (None, SubTable, ( (None, TableInList, (genobj.tupleset, sindex),2,1),(None, Fail, Here),(svalue, Skip, 1) ),1,1) ]
else: # not optional
return [ (None, SubTable, ( (None, TableInList, (genobj.tupleset, sindex),2,1),(None, Fail, Here),(svalue, Skip, 1) )) ]
else: # positive
if repeating:
if optional:
return [ (svalue, TableInList, (genobj.tupleset, sindex), 1,0) ]
else: # not optional
return [ (svalue, TableInList, (genobj.tupleset, sindex)), (svalue, TableInList, (genobj.tupleset, sindex),1,0) ]
else: # single
if optional:
return [ (svalue, TableInList, (genobj.tupleset, sindex), 1,1) ]
else: # not optional
return [ (svalue, TableInList, (genobj.tupleset, sindex)) ]
specialescapedmap = {
'a':'\a',
'b':'\b',
'f':'\f',
'n':'\n',
'r':'\r',
't':'\t',
'v':'\v',
'\\':'\\',
'"':'"',
"'":"'",
}
def escapedchar( self, el, genobj ):
svalue = ''
if el[3][0][0] == 'SPECIALESCAPEDCHAR':
svalue = svalue + self.specialescapedmap[ genobj.stringval( el[3][0] ) ]
elif el[3][0][0] == 'OCTALESCAPEDCHAR':
#print 'OCTALESCAPEDCHAR', genobj.stringval( el)
ovnum = 0
ovpow = 0
ov = genobj.stringval( el[3][0] )
while ov:
ovnum = ovnum + int( ov[-1] ) * (8**ovpow)
ovpow = ovpow + 1
ov = ov[:-1]
svalue = svalue + chr( ovnum )
#print 'svalue ', `svalue`
return svalue
def literal( self, value, genobj, negative = None, repeating=None, optional=None, reportname=None ):
'''
Calculate the tag-table for a literal element token
'''
svalue = ''
for el in value[3]:
if el[0] in ('CHARNOSNGLQUOTE', 'CHARNODBLQUOTE'):
svalue = svalue+genobj.stringval( el )
elif el[0] == 'ESCAPEDCHAR':
svalue = svalue + self.escapedchar( el, genobj )
#print 'literal value', `genobj.stringval( value )`
#print ' svalue', `svalue`
# svalue = svalue[1:-1]
if negative:
if repeating: # a repeating negative value, a "search" in effect
if optional: # if fails, then go to end of file
return [ (None, sWordStart, BMS( svalue ),1,2), (None, Move, ToEOF ) ]
else: # must first check to make sure the current position is not the word, then the same
return [ (None, Word, svalue, 2,1),(None, Fail, Here),(None, sWordStart, BMS( svalue ),1,2), (None, Move, ToEOF ) ]
#return [ (None, Word, svalue, 2,1),(None, Fail, Here),(None, WordStart, svalue,1,2), (None, Move, ToEOF ) ]
else: # a single-character test saying "not a this"
if optional: # test for a success, move back if success, move one forward if failure
if len(svalue) > 1:
return [ (None, Word, svalue, 2,1),
(None, Skip, -len(svalue), 2,2), # backup if this was the word to start of word, succeed
(None, Skip, 1 ) ] # else just move one character and succeed
else: # Uses Is test instead of Word test, should be faster I'd imagine
return [ (None, Is, svalue, 2,1),
(None, Skip, -1, 2,2), # backtrack
(None, Skip, 1 ) ] # else just move one character and succeed
else: # must find at least one character not part of the word, so
if len(svalue) > 1:
return [ (None, Word, svalue, 2,1),
(None, Fail, Here),
(None, Skip, 1 ) ] # else just move one character and succeed
else: #must fail if it finds or move one forward
return [ (None, Is, svalue, 2,1),
(None, Fail, Here),
(None, Skip, 1 ) ] # else just move one character and succeed
else: # positive
if repeating:
if optional:
if len(svalue) > 1:
return [ (None, Word, svalue, 1,0) ]
else:
return [ (None, Is, svalue, 1,0) ]
else: # not optional
if len(svalue) > 1:
return [ (None, Word, svalue),(None, Word, svalue,1,0) ]
else:
return [ (None, Is, svalue),(None, Is, svalue,1,0) ]
else: # not repeating
if optional:
if len(svalue) > 1:
return [ (None, Word, svalue, 1,1) ]
else:
return [ (None, Is, svalue, 1,1) ]
else: # not optional
if len(svalue) > 1:
return [ (None, Word, svalue) ]
else:
return [ (None, Word, svalue) ]
def charnobrace( self, cval, genobj ):
#print 'cval', cval
if cval[3][0][0] == 'ESCAPEDCHAR':
return self.escapedchar( cval[3][0], genobj )
#print '''Straight non-brace character''', `genobj.stringval( cval[3][0] )`
return genobj.stringval( cval )
def range( self, value, genobj, negative = None, repeating=None, optional=None, reportname=None ):
dataset = []
for cval in value[3]:
if cval[0] == 'CHARBRACE':
dataset.append( ']')
elif cval[0] == 'CHARDASH':
dataset.append( '-')
elif cval[0] == 'CHARNOBRACE':
dataset.append( self.charnobrace( cval, genobj ) )
elif cval[0] == 'CHARRANGE':
start = ord( self.charnobrace( cval[3][0], genobj ) )
end = ord( self.charnobrace( cval[3][1], genobj ) )
if start < end:
dataset.append( string.join( map( chr, range( start, end +1 ) ), '' ) )
else:
dataset.append( string.join( map( chr, range( end, start +1 ) ), '' ) )
else:
dataset.append( genobj.stringval( cval ) )
if negative:
#svalue = set( string.join( dataset, '' ), 0 )
svalue = string.join( dataset, '' )
else:
#svalue = set( string.join( dataset, '' ), 1)
svalue = string.join( dataset, '' )
if negative:
if repeating:
if optional:
#return [ (None, AllInSet, svalue, 1 ) ]
return [ (None, AllNotIn, svalue, 1 ) ]
else: # not optional
#return [ (None, AllInSet, svalue ) ]
return [ (None, AllNotIn, svalue ) ]
else: # not repeating
if optional:
#return [ (None, IsInSet, svalue, 1 ) ]
return [ (None, IsNotIn, svalue, 1 ) ]
else: # not optional
#return [ (None, IsInSet, svalue ) ]
return [ (None, IsNotIn, svalue ) ]
else:
if repeating:
if optional:
#return [ (None, AllInSet, svalue, 1 ) ]
return [ (None, AllIn, svalue, 1 ) ]
else: # not optional
#return [ (None, AllInSet, svalue ) ]
return [ (None, AllIn, svalue ) ]
else: # not repeating
if optional:
#return [ (None, IsInSet, svalue, 1 ) ]
return [ (None, IsIn, svalue, 1 ) ]
else: # not optional
#return [ (None, IsInSet, svalue ) ]
return [ (None, IsIn, svalue ) ]
class Generator( _BaseGenerator ):
def __init__( self, syntaxstring , parser ):
self.syntaxstring = syntaxstring
self.parsetree = [0,1,2, tag( syntaxstring, parser )[1] ]
self.nameset = []
self.tupleset = []
def buildParser( declaration, prebuiltnodes=() ):
'''
End-developer function to create an application-specific parser
the parsing tuple is available on the returned object as
object.parserbyname( 'declaredname' ), where declaredname is the
name you defined in your language defintion file.
The declaration argument is the text of a language defintion file.
'''
proc = _BaseGenerator( )
proc.build()
newgen = Generator( declaration, proc.parserbyname( 'declarationset' ) )
newgen.build( prebuiltnodes=prebuiltnodes )
return newgen
|