Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/python/modules/TextTools/Constants/TagTables.py')
-rw-r--r--intern/python/modules/TextTools/Constants/TagTables.py348
1 files changed, 348 insertions, 0 deletions
diff --git a/intern/python/modules/TextTools/Constants/TagTables.py b/intern/python/modules/TextTools/Constants/TagTables.py
new file mode 100644
index 00000000000..315d825b94e
--- /dev/null
+++ b/intern/python/modules/TextTools/Constants/TagTables.py
@@ -0,0 +1,348 @@
+""" Constants for writing tag tables
+
+ The documentation in this file is obsoleted by the HTML docs in
+ the Doc/ subdirectory of the package. Constants defined here must
+ match those in mxTextTools/mxte.h.
+
+ (c) Copyright Marc-Andre Lemburg; All Rights Reserved.
+ See the documentation for further information on copyrights,
+ or contact the author (mal@lemburg.com).
+"""
+#########################################################################
+# This file contains the definitions and constants used by the tagging
+# engine:
+#
+# 1. Matching Tables
+# 2. Commands & Constants
+# 3. Matching Functions
+# 4. Callable tagobjects
+# 5. Calling the engine & Taglists
+#
+
+#########################################################################
+# 1. Matching Tables:
+#
+# these are tuples of tuples, each entry having the following meaning:
+#
+# syntax: (tag, cmd, chars|table|fct [,jne] [,je=1])
+# tag = object used to mark this section, if it matches
+# cmd = command (see below)
+# chars = match one or more of these characters
+# table = table to use for matching characters
+# fct = function to call (see below)
+# jne = if the current character doesn't match, jump this
+# many table entries relative to the current entry
+# je = if we have a match make a relative jump of this length
+#
+# * a table matches a string iff the end of the table is reached
+# (that is: an index is requested that is beyond the end-of-table)
+# * a table is not matched if a tag is not matched and no jne is given;
+# if it is matched then processing simply moves on to the next entry
+# * marking is done by adding the matching slice in the string
+# together with the marking object to the tag list; if the object is
+# None, then it will not be appended to the taglist list
+# * if the flag CallTag is set in cmd, then instead of appending
+# matches to the taglist, the tagobj will be called (see below)
+#
+# TIP: if you are getting an error 'call of a non-function' while
+# writing a table definition, you probably have a missing ','
+# somewhere in the tuple !
+#
+# For examples see the tag*.py - files that came with this engine.
+#
+
+#########################################################################
+# 2. Commands & Constants
+#
+#
+
+#
+# some useful constants for writing matching tables
+#
+
+To = None # good for cmd=Jump
+Here = None # good for cmd=Fail and EOF
+MatchOk = 20000 # somewhere beyond the end of the tag table...
+MatchFail = -20000 # somewhere beyond the start of the tag table...
+ToEOF = -1 # good for cmd=Move
+
+ThisTable = 999 # to recursively match using the current table;
+ # can be passed as argument to Table and SubTable
+ # instead of a tuple
+
+#
+# commands and flags passed in cmd (see below)
+#
+# note: I might add some further commands to this list, if needed
+# (the numbers will then probably change, but not the
+# names)
+#
+# convention: a command "matches", if and only if it moves the
+# current position at least one character; a command "reads"
+# characters the characters, if they match ok
+#
+# notations:
+#
+# x refers to the current position in the string
+# len refers to the string length or what the function tag() is told to
+# believe it to be (i.e. the engine only looks at the slice text[x:len])
+# text refers to the text string
+# jne is the optional relative jump distance in case the command
+# did not match, i.e. x before and after applying the command
+# are the same (if not given the current table is considered
+# not to match)
+# je is the optional relative jump distance in case the command
+# did match (it defaults to +1)
+#
+
+# commands
+Fail = 0 # this will always fail (position remains unchanged)
+Jump = 0 # jump to jne (position remains unchanged)
+
+# match & read chars
+AllIn = 11 # all chars in match (at least one)
+AllNotIn = 12 # all chars not in match (at least one)
+Is = 13 # current char must be == match (matches one char)
+IsIn = 14 # current char must be in match (matches one char)
+IsNot = 15 # current char must be be != match (matches one char)
+IsNotIn = 15 # current char must be not be in match (matches one char)
+
+AllInSet = 31
+IsInSet = 32
+
+# match & read for whole words
+Word = 21 # the next chars must be those in match
+WordStart = 22 # all chars up to the first occ. of match (at least one)
+WordEnd = 23 # same as WordStart, accept that the text pointer
+ # is moved behind the match
+NoWord = WordStart # all chars up to the first occ. of match (at least one)
+
+
+# match using search objects BMS or FS
+sWordStart = 111 # all chars up to the first occ. of match (may be 0 chars)
+sWordEnd = 112 # same as WordStart, accept that the text pointer
+ # is moved behind the match
+sFindWord = 113 # find match and process the found slice only (ignoring
+ # the chars that lead up to the match); positions
+ # the text pointer right after the match like WordEnd
+
+# functions & tables
+Call = 201 # call match(text,x,len) as function (see above)
+CallArg = 202 # match has to be a 2-tuple (fct,arg), then
+ # fct(text,x,len,arg) is called; the return value is taken
+ # as new x; it is considered matching if the new x is
+ # different than the x before the call -- like always
+ # (note: arg has to be *one* object, e.g. a tuple)
+Table = 203 # match using table (given in match)
+SubTable = 207 # match using sub table (given in match); the sub table
+ # uses the same taglist as the calling table
+TableInList = 204 # same as Table, but match is a tuple (list,index)
+ # and the table list[index] is used as matching
+ # table
+SubTableInList = 208
+ # same as TableInList, but the sub table
+ # uses the same taglist as the calling table
+
+# specials
+EOF = 1 # current position must be EOF, e.g. >= len(string)
+Skip = 2 # skip match (must be an integer) chars; note: this cmd
+ # always matches ok, so jne doesn't have any meaning in
+ # this context
+Move = 3 # move the current text position to match (if negative,
+ # the text length + 1 (!) is added, thus -1 moves to the
+ # EOF, -2 to the last char and so on); note: this cmd
+ # always matches ok, so jne doesn't have any meaning in
+ # this context
+
+# loops
+Loop = 205 # loop-construct
+ #
+ # (tagobj,Loop,Count,jne,je) - sets/decrements the
+ # loop variable for current table according to the
+ # following rules:
+ # 1. the first time the engine passes this entry
+ # sets the loop variable to Count and continues
+ # without reading any character, but saving the
+ # current position in text
+ # 2. the next time, it decrements the loop variable
+ # and checks if it is < 0:
+ # (a) if it is, then the tagobj is added to the
+ # taglist with the slice (saved position, current
+ # position) and processing continues at entry
+ # current + jne
+ # (b) else, processing continues at entry current + je
+ # Note: if you jump out of the loop while the loop
+ # variable is still > 0, then you *must*
+ # reset the loop mechanism with
+ # (None,LoopControl,Reset)
+ # Note: you can skip the remaining loops by calling
+ # (None,LoopControl,Break) and jumping back
+ # to the Loop-entry; this sets the loop
+ # variable to 0
+ # Note: tables cannot have nested loops within their
+ # context; you can have nested loops in nested
+ # tables though (there is one loop var per
+ # tag()-call which takes place every time
+ # a table match is done)
+ #
+LoopControl = 206 # controls the loop variable (always succeeds, i.e.
+ # jne has no meaning);
+ # match may be one of:
+Break = 0 # * sets the loop variable to 0, thereby allowing
+ # to skip the remaining loops
+Reset = -1 # * resets the loop mechanism (see note above)
+ #
+ # See tagLoop.py for some examples.
+
+##########################################################################
+#
+# Flags (to be '+'ed with the above command code)
+#
+CallTag = 256 # call tagobj(taglist,text,l,r,subtags)
+ # upon successfully matching the slice [l:r] in text
+ # * taglist is the current list tags found (may be None)
+ # * subtags is a sub-list, passed when a subtable was used
+ # to do the matching -- it is None otherwise !)
+#
+# example entry with CallTag-flag set:
+#
+# (found_a_tag,CallTag+Table,tagtable)
+# -- if tagtable matches the current text position,
+# found_a_tag(taglist,text,l,r,newtaglist) is called and
+# the match is *not* appended to the taglist by the tagging
+# engine (the function would have to do this, in case it is needed)
+
+AppendToTagobj = 512 # this appends the slice found to the tagobj, assuming
+ # that it is a Python list:
+ # does a tagobj.append((None,l,r,subtags)) call
+# Alias for b/w comp.
+AppendToTag = AppendToTagobj
+
+AppendTagobj = 1024 # don't append (tagobj,l,r,subtags) to the taglist,
+ # but only tagobj itself; the information in l,r,subtags
+ # is lost, yet this can be used to write tag tables
+ # whose output can be used directly by tag.join()
+
+AppendMatch = 2048 # append the match to the taglist instead of
+ # the tag object; this produces non-standard
+ # taglists !
+
+#########################################################################
+# 3. Matching Functions
+#
+# syntax:
+#
+# fct(s,x,len_s)
+# where s = string we are working on
+# x = current index in s where we wnat to match something
+# len_s = 'length' of s, this is how far the search may be
+# conducted in s, not necessarily the true length of s
+#
+# * the function has to return the index of the char right after
+# matched string, e.g.
+#
+# 'xyzabc' ---> 'xyz' matches ---> return x+3
+#
+# * if the string doesn't match simply return x; in other words:
+# the function has to return the matching slice's right index
+# * you can use this to match e.g. 10 characters of a certain kind,
+# or any word out of a given list, etc.
+# * note: you cannot give the function additional parameters from within
+# the matching table, so it has to know everything it needs to
+# know a priori; use dynamic programming !
+#
+# some examples (not needed, since all are implemented by commands)
+#
+#
+#def matchword(x):
+# s = """
+#def a(s,x,len_text):
+# y = x+%i
+# if s[x:y] == %s: return y
+# return x
+#"""
+# exec s % (len(x),repr(x))
+# return a
+#
+#def rejectword(x):
+# s = """
+#def a(s,x,len_text):
+# while x < len(s) and s[x:x+%i] != %s:
+# x = x + 1
+# return x
+#"""
+# exec s % (len(x),repr(x))
+# return a
+#
+#def HTML_Comment(s,x,len_text):
+# while x < len_text and s[x:x+3] != '-->':
+# x = x + 1
+# return x
+#
+#
+
+#########################################################################
+# 4. Callable tagobjects
+#
+# a sample callable tagobj:
+#
+#
+#def test(taglist,text,l,r,newtaglist):
+#
+# print 'found',repr(text[l:r])[:40],(l,r)
+#
+#
+
+#########################################################################
+# 5. Calling the engine & Taglists
+#
+# The function
+# tag(text,table,start=0,len_text=len(text),taglistinit=[])
+# found in mxTextTools:
+#
+# This function does all the matching according to the above rules.
+# You give it a text string and a tag table and it will
+# start processing the string starting from 'start' (which defaults to 0)
+# and continue working until it reaches the 'EOF', i.e. len_text (which
+# defaults to the text length). It thus tags the slice text[start:len_text].
+#
+# The function will create a list of found tags in the following
+# format (which I call taglist):
+#
+# (tagobj,l,r,subtaglist)
+#
+# where: tagobj = specified tag object taken from the table
+# [l:r] = slice that matched the tag in text
+# subtaglist = if matching was done using a subtable
+# this is the taglist it produced; in all other
+# cases this will be None
+#
+# * if you pass None as taglistinit, then no taglist will be created,
+# i.e. only CallTag commands will have any effect. (This saves
+# temporary memory for big files)
+# * the function returns a tuple:
+# (success, taglist, nextindex)
+# where: success = 0/1
+# taglist = the produced list or None
+# nextindex = the index+1 of the last char that matched
+# (in case of failure, this points to the beginning
+# of the substring that caused the problem)
+#
+
+### Module init.
+
+def _module_init():
+
+ global id2cmd
+
+ import types
+ id2cmd = {}
+ IntType = types.IntType
+ for cmd,value in globals().items():
+ if type(value) == IntType:
+ if value == 0:
+ id2cmd[0] = 'Fail/Jump'
+ else:
+ id2cmd[value] = cmd
+
+_module_init()