Package pyparsing ::
Module pyparsing
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 __doc__ = \
27 """
28 pyparsing module - Classes and methods to define and execute parsing grammars
29
30 The pyparsing module is an alternative approach to creating and executing simple grammars,
31 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
32 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33 provides a library of classes that you use to construct the grammar directly in Python.
34
35 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
36
37 from pyparsing import Word, alphas
38
39 # define grammar of a greeting
40 greet = Word( alphas ) + "," + Word( alphas ) + "!"
41
42 hello = "Hello, World!"
43 print hello, "->", greet.parseString( hello )
44
45 The program outputs the following::
46
47 Hello, World! -> ['Hello', ',', 'World', '!']
48
49 The Python representation of the grammar is quite readable, owing to the self-explanatory
50 class names, and the use of '+', '|' and '^' operators.
51
52 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
53 object with named attributes.
54
55 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
56 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
57 - quoted strings
58 - embedded comments
59 """
60
61 __version__ = "1.5.3"
62 __versionTime__ = "14 May 2010 22:21"
63 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
64
65 import string
66 from weakref import ref as wkref
67 import copy
68 import sys
69 import warnings
70 import re
71 import sre_constants
72
73
74 __all__ = [
75 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
76 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
77 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
78 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
79 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
80 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
81 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
82 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
83 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
84 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
85 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
86 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
87 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
88 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
89 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
90 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
91 'indentedBlock', 'originalTextFor',
92 ]
93
94 """
95 Detect if we are running version 3.X and make appropriate changes
96 Robert A. Clark
97 """
98 _PY3K = sys.version_info[0] > 2
99 if _PY3K:
100 _MAX_INT = sys.maxsize
101 basestring = str
102 unichr = chr
103 _ustr = str
104 alphas = string.ascii_lowercase + string.ascii_uppercase
105 else:
106 _MAX_INT = sys.maxint
107 range = xrange
108 set = lambda s : dict( [(c,0) for c in s] )
109 alphas = string.lowercase + string.uppercase
110
112 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
113 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
114 then < returns the unicode object | encodes it with the default encoding | ... >.
115 """
116 if isinstance(obj,unicode):
117 return obj
118
119 try:
120
121
122 return str(obj)
123
124 except UnicodeEncodeError:
125
126
127
128
129
130 return unicode(obj)
131
132
133
134
135
136
137
138
140 """Escape &, <, >, ", ', etc. in a string of data."""
141
142
143 from_symbols = '&><"\''
144 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
145 for from_,to_ in zip(from_symbols, to_symbols):
146 data = data.replace(from_, to_)
147 return data
148
151
152 nums = string.digits
153 hexnums = nums + "ABCDEFabcdef"
154 alphanums = alphas + nums
155 _bslash = chr(92)
156 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
157
159 """base exception class for all parsing runtime exceptions"""
160
161
162 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
163 self.loc = loc
164 if msg is None:
165 self.msg = pstr
166 self.pstr = ""
167 else:
168 self.msg = msg
169 self.pstr = pstr
170 self.parserElement = elem
171
173 """supported attributes by name are:
174 - lineno - returns the line number of the exception text
175 - col - returns the column number of the exception text
176 - line - returns the line containing the exception text
177 """
178 if( aname == "lineno" ):
179 return lineno( self.loc, self.pstr )
180 elif( aname in ("col", "column") ):
181 return col( self.loc, self.pstr )
182 elif( aname == "line" ):
183 return line( self.loc, self.pstr )
184 else:
185 raise AttributeError(aname)
186
188 return "%s (at char %d), (line:%d, col:%d)" % \
189 ( self.msg, self.loc, self.lineno, self.column )
203 return "loc msg pstr parserElement lineno col line " \
204 "markInputLine __str__ __repr__".split()
205
207 """exception thrown when parse expressions don't match class;
208 supported attributes by name are:
209 - lineno - returns the line number of the exception text
210 - col - returns the column number of the exception text
211 - line - returns the line containing the exception text
212 """
213 pass
214
216 """user-throwable exception thrown when inconsistent parse content
217 is found; stops all parsing immediately"""
218 pass
219
221 """just like ParseFatalException, but thrown internally when an
222 ErrorStop indicates that parsing is to stop immediately because
223 an unbacktrackable syntax error has been found"""
227
228
229
230
231
232
233
234
235
236
237
238
239
240
242 """exception thrown by validate() if the grammar could be improperly recursive"""
243 - def __init__( self, parseElementList ):
244 self.parseElementTrace = parseElementList
245
247 return "RecursiveGrammarException: %s" % self.parseElementTrace
248
255 return repr(self.tup)
257 self.tup = (self.tup[0],i)
258
260 """Structured parse results, to provide multiple means of access to the parsed data:
261 - as a list (len(results))
262 - by list index (results[0], results[1], etc.)
263 - by attribute (results.<resultsName>)
264 """
265
266 - def __new__(cls, toklist, name=None, asList=True, modal=True ):
267 if isinstance(toklist, cls):
268 return toklist
269 retobj = object.__new__(cls)
270 retobj.__doinit = True
271 return retobj
272
273
274
275 - def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ):
276 if self.__doinit:
277 self.__doinit = False
278 self.__name = None
279 self.__parent = None
280 self.__accumNames = {}
281 if isinstance(toklist, list):
282 self.__toklist = toklist[:]
283 else:
284 self.__toklist = [toklist]
285 self.__tokdict = dict()
286
287 if name is not None and name:
288 if not modal:
289 self.__accumNames[name] = 0
290 if isinstance(name,int):
291 name = _ustr(name)
292 self.__name = name
293 if not toklist in (None,'',[]):
294 if isinstance(toklist,basestring):
295 toklist = [ toklist ]
296 if asList:
297 if isinstance(toklist,ParseResults):
298 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
299 else:
300 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
301 self[name].__name = name
302 else:
303 try:
304 self[name] = toklist[0]
305 except (KeyError,TypeError,IndexError):
306 self[name] = toklist
307
309 if isinstance( i, (int,slice) ):
310 return self.__toklist[i]
311 else:
312 if i not in self.__accumNames:
313 return self.__tokdict[i][-1][0]
314 else:
315 return ParseResults([ v[0] for v in self.__tokdict[i] ])
316
318 if isinstance(v,_ParseResultsWithOffset):
319 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
320 sub = v[0]
321 elif isinstance(k,int):
322 self.__toklist[k] = v
323 sub = v
324 else:
325 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
326 sub = v
327 if isinstance(sub,ParseResults):
328 sub.__parent = wkref(self)
329
331 if isinstance(i,(int,slice)):
332 mylen = len( self.__toklist )
333 del self.__toklist[i]
334
335
336 if isinstance(i, int):
337 if i < 0:
338 i += mylen
339 i = slice(i, i+1)
340
341 removed = list(range(*i.indices(mylen)))
342 removed.reverse()
343
344 for name in self.__tokdict:
345 occurrences = self.__tokdict[name]
346 for j in removed:
347 for k, (value, position) in enumerate(occurrences):
348 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
349 else:
350 del self.__tokdict[i]
351
353 return k in self.__tokdict
354
355 - def __len__( self ): return len( self.__toklist )
356 - def __bool__(self): return len( self.__toklist ) > 0
357 __nonzero__ = __bool__
358 - def __iter__( self ): return iter( self.__toklist )
359 - def __reversed__( self ): return iter( self.__toklist[::-1] )
361 """Returns all named result keys."""
362 return self.__tokdict.keys()
363
364 - def pop( self, index=-1 ):
365 """Removes and returns item at specified index (default=last).
366 Will work with either numeric indices or dict-key indicies."""
367 ret = self[index]
368 del self[index]
369 return ret
370
371 - def get(self, key, defaultValue=None):
372 """Returns named result matching the given key, or if there is no
373 such name, then returns the given defaultValue or None if no
374 defaultValue is specified."""
375 if key in self:
376 return self[key]
377 else:
378 return defaultValue
379
380 - def insert( self, index, insStr ):
381 self.__toklist.insert(index, insStr)
382
383 for name in self.__tokdict:
384 occurrences = self.__tokdict[name]
385 for k, (value, position) in enumerate(occurrences):
386 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
387
389 """Returns all named result keys and values as a list of tuples."""
390 return [(k,self[k]) for k in self.__tokdict]
391
393 """Returns all named result values."""
394 return [ v[-1][0] for v in self.__tokdict.values() ]
395
397 if True:
398 if name in self.__tokdict:
399 if name not in self.__accumNames:
400 return self.__tokdict[name][-1][0]
401 else:
402 return ParseResults([ v[0] for v in self.__tokdict[name] ])
403 else:
404 return ""
405 return None
406
408 ret = self.copy()
409 ret += other
410 return ret
411
413 if other.__tokdict:
414 offset = len(self.__toklist)
415 addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
416 otheritems = other.__tokdict.items()
417 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
418 for (k,vlist) in otheritems for v in vlist]
419 for k,v in otherdictitems:
420 self[k] = v
421 if isinstance(v[0],ParseResults):
422 v[0].__parent = wkref(self)
423
424 self.__toklist += other.__toklist
425 self.__accumNames.update( other.__accumNames )
426 return self
427
429 if isinstance(other,int) and other == 0:
430 return self.copy()
431
433 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
434
436 out = "["
437 sep = ""
438 for i in self.__toklist:
439 if isinstance(i, ParseResults):
440 out += sep + _ustr(i)
441 else:
442 out += sep + repr(i)
443 sep = ", "
444 out += "]"
445 return out
446
448 out = []
449 for item in self.__toklist:
450 if out and sep:
451 out.append(sep)
452 if isinstance( item, ParseResults ):
453 out += item._asStringList()
454 else:
455 out.append( _ustr(item) )
456 return out
457
459 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
460 out = []
461 for res in self.__toklist:
462 if isinstance(res,ParseResults):
463 out.append( res.asList() )
464 else:
465 out.append( res )
466 return out
467
469 """Returns the named parse results as dictionary."""
470 return dict( self.items() )
471
473 """Returns a new copy of a ParseResults object."""
474 ret = ParseResults( self.__toklist )
475 ret.__tokdict = self.__tokdict.copy()
476 ret.__parent = self.__parent
477 ret.__accumNames.update( self.__accumNames )
478 ret.__name = self.__name
479 return ret
480
481 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
482 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
483 nl = "\n"
484 out = []
485 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
486 for v in vlist ] )
487 nextLevelIndent = indent + " "
488
489
490 if not formatted:
491 indent = ""
492 nextLevelIndent = ""
493 nl = ""
494
495 selfTag = None
496 if doctag is not None:
497 selfTag = doctag
498 else:
499 if self.__name:
500 selfTag = self.__name
501
502 if not selfTag:
503 if namedItemsOnly:
504 return ""
505 else:
506 selfTag = "ITEM"
507
508 out += [ nl, indent, "<", selfTag, ">" ]
509
510 worklist = self.__toklist
511 for i,res in enumerate(worklist):
512 if isinstance(res,ParseResults):
513 if i in namedItems:
514 out += [ res.asXML(namedItems[i],
515 namedItemsOnly and doctag is None,
516 nextLevelIndent,
517 formatted)]
518 else:
519 out += [ res.asXML(None,
520 namedItemsOnly and doctag is None,
521 nextLevelIndent,
522 formatted)]
523 else:
524
525 resTag = None
526 if i in namedItems:
527 resTag = namedItems[i]
528 if not resTag:
529 if namedItemsOnly:
530 continue
531 else:
532 resTag = "ITEM"
533 xmlBodyText = _xml_escape(_ustr(res))
534 out += [ nl, nextLevelIndent, "<", resTag, ">",
535 xmlBodyText,
536 "</", resTag, ">" ]
537
538 out += [ nl, indent, "</", selfTag, ">" ]
539 return "".join(out)
540
542 for k,vlist in self.__tokdict.items():
543 for v,loc in vlist:
544 if sub is v:
545 return k
546 return None
547
549 """Returns the results name for this token expression."""
550 if self.__name:
551 return self.__name
552 elif self.__parent:
553 par = self.__parent()
554 if par:
555 return par.__lookup(self)
556 else:
557 return None
558 elif (len(self) == 1 and
559 len(self.__tokdict) == 1 and
560 self.__tokdict.values()[0][0][1] in (0,-1)):
561 return self.__tokdict.keys()[0]
562 else:
563 return None
564
565 - def dump(self,indent='',depth=0):
566 """Diagnostic method for listing out the contents of a ParseResults.
567 Accepts an optional indent argument so that this string can be embedded
568 in a nested display of other data."""
569 out = []
570 out.append( indent+_ustr(self.asList()) )
571 keys = self.items()
572 keys.sort()
573 for k,v in keys:
574 if out:
575 out.append('\n')
576 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
577 if isinstance(v,ParseResults):
578 if v.keys():
579 out.append( v.dump(indent,depth+1) )
580 else:
581 out.append(_ustr(v))
582 else:
583 out.append(_ustr(v))
584 return "".join(out)
585
586
588 return ( self.__toklist,
589 ( self.__tokdict.copy(),
590 self.__parent is not None and self.__parent() or None,
591 self.__accumNames,
592 self.__name ) )
593
595 self.__toklist = state[0]
596 self.__tokdict, \
597 par, \
598 inAccumNames, \
599 self.__name = state[1]
600 self.__accumNames = {}
601 self.__accumNames.update(inAccumNames)
602 if par is not None:
603 self.__parent = wkref(par)
604 else:
605 self.__parent = None
606
609
611 """Returns current column within a string, counting newlines as line separators.
612 The first column is number 1.
613
614 Note: the default parsing behavior is to expand tabs in the input string
615 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
616 on parsing strings containing <TAB>s, and suggested methods to maintain a
617 consistent view of the parsed string, the parse location, and line and column
618 positions within the parsed string.
619 """
620 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
621
623 """Returns current line number within a string, counting newlines as line separators.
624 The first line is number 1.
625
626 Note: the default parsing behavior is to expand tabs in the input string
627 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
628 on parsing strings containing <TAB>s, and suggested methods to maintain a
629 consistent view of the parsed string, the parse location, and line and column
630 positions within the parsed string.
631 """
632 return strg.count("\n",0,loc) + 1
633
634 -def line( loc, strg ):
635 """Returns the line of text containing loc within a string, counting newlines as line separators.
636 """
637 lastCR = strg.rfind("\n", 0, loc)
638 nextCR = strg.find("\n", loc)
639 if nextCR >= 0:
640 return strg[lastCR+1:nextCR]
641 else:
642 return strg[lastCR+1:]
643
645 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
646
648 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
649
651 print ("Exception raised:" + _ustr(exc))
652
654 """'Do-nothing' debug action, to suppress debugging output during parsing."""
655 pass
656
658 """Abstract base level parser element class."""
659 DEFAULT_WHITE_CHARS = " \n\t\r"
660 verbose_stacktrace = False
661
666 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
667
669 self.parseAction = list()
670 self.failAction = None
671
672 self.strRepr = None
673 self.resultsName = None
674 self.saveAsList = savelist
675 self.skipWhitespace = True
676 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
677 self.copyDefaultWhiteChars = True
678 self.mayReturnEmpty = False
679 self.keepTabs = False
680 self.ignoreExprs = list()
681 self.debug = False
682 self.streamlined = False
683 self.mayIndexError = True
684 self.errmsg = ""
685 self.modalResults = True
686 self.debugActions = ( None, None, None )
687 self.re = None
688 self.callPreparse = True
689 self.callDuringTry = False
690
692 """Make a copy of this ParserElement. Useful for defining different parse actions
693 for the same parsing pattern, using copies of the original parse element."""
694 cpy = copy.copy( self )
695 cpy.parseAction = self.parseAction[:]
696 cpy.ignoreExprs = self.ignoreExprs[:]
697 if self.copyDefaultWhiteChars:
698 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
699 return cpy
700
702 """Define name for this expression, for use in debugging."""
703 self.name = name
704 self.errmsg = "Expected " + self.name
705 if hasattr(self,"exception"):
706 self.exception.msg = self.errmsg
707 return self
708
710 """Define name for referencing matching tokens as a nested attribute
711 of the returned parse results.
712 NOTE: this returns a *copy* of the original ParserElement object;
713 this is so that the client can define a basic element, such as an
714 integer, and reference it in multiple places with different names.
715 """
716 newself = self.copy()
717 newself.resultsName = name
718 newself.modalResults = not listAllMatches
719 return newself
720
722 """Method to invoke the Python pdb debugger when this element is
723 about to be parsed. Set breakFlag to True to enable, False to
724 disable.
725 """
726 if breakFlag:
727 _parseMethod = self._parse
728 def breaker(instring, loc, doActions=True, callPreParse=True):
729 import pdb
730 pdb.set_trace()
731 return _parseMethod( instring, loc, doActions, callPreParse )
732 breaker._originalParseMethod = _parseMethod
733 self._parse = breaker
734 else:
735 if hasattr(self._parse,"_originalParseMethod"):
736 self._parse = self._parse._originalParseMethod
737 return self
738
740 """Internal method used to decorate parse actions that take fewer than 3 arguments,
741 so that all parse actions can be called as f(s,l,t)."""
742 STAR_ARGS = 4
743
744 try:
745 restore = None
746 if isinstance(f,type):
747 restore = f
748 f = f.__init__
749 if not _PY3K:
750 codeObj = f.func_code
751 else:
752 codeObj = f.code
753 if codeObj.co_flags & STAR_ARGS:
754 return f
755 numargs = codeObj.co_argcount
756 if not _PY3K:
757 if hasattr(f,"im_self"):
758 numargs -= 1
759 else:
760 if hasattr(f,"__self__"):
761 numargs -= 1
762 if restore:
763 f = restore
764 except AttributeError:
765 try:
766 if not _PY3K:
767 call_im_func_code = f.__call__.im_func.func_code
768 else:
769 call_im_func_code = f.__code__
770
771
772
773 if call_im_func_code.co_flags & STAR_ARGS:
774 return f
775 numargs = call_im_func_code.co_argcount
776 if not _PY3K:
777 if hasattr(f.__call__,"im_self"):
778 numargs -= 1
779 else:
780 if hasattr(f.__call__,"__self__"):
781 numargs -= 0
782 except AttributeError:
783 if not _PY3K:
784 call_func_code = f.__call__.func_code
785 else:
786 call_func_code = f.__call__.__code__
787
788 if call_func_code.co_flags & STAR_ARGS:
789 return f
790 numargs = call_func_code.co_argcount
791 if not _PY3K:
792 if hasattr(f.__call__,"im_self"):
793 numargs -= 1
794 else:
795 if hasattr(f.__call__,"__self__"):
796 numargs -= 1
797
798
799
800 if numargs == 3:
801 return f
802 else:
803 if numargs > 3:
804 def tmp(s,l,t):
805 return f(f.__call__.__self__, s,l,t)
806 if numargs == 2:
807 def tmp(s,l,t):
808 return f(l,t)
809 elif numargs == 1:
810 def tmp(s,l,t):
811 return f(t)
812 else:
813 def tmp(s,l,t):
814 return f()
815 try:
816 tmp.__name__ = f.__name__
817 except (AttributeError,TypeError):
818
819 pass
820 try:
821 tmp.__doc__ = f.__doc__
822 except (AttributeError,TypeError):
823
824 pass
825 try:
826 tmp.__dict__.update(f.__dict__)
827 except (AttributeError,TypeError):
828
829 pass
830 return tmp
831 _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
832
834 """Define action to perform when successfully matching parse element definition.
835 Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
836 fn(loc,toks), fn(toks), or just fn(), where:
837 - s = the original string being parsed (see note below)
838 - loc = the location of the matching substring
839 - toks = a list of the matched tokens, packaged as a ParseResults object
840 If the functions in fns modify the tokens, they can return them as the return
841 value from fn, and the modified list of tokens will replace the original.
842 Otherwise, fn does not need to return any value.
843
844 Note: the default parsing behavior is to expand tabs in the input string
845 before starting the parsing process. See L{I{parseString}<parseString>} for more information
846 on parsing strings containing <TAB>s, and suggested methods to maintain a
847 consistent view of the parsed string, the parse location, and line and column
848 positions within the parsed string.
849 """
850 self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))
851 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
852 return self
853
855 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
856 self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))
857 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
858 return self
859
861 """Define action to perform if parsing fails at this expression.
862 Fail acton fn is a callable function that takes the arguments
863 fn(s,loc,expr,err) where:
864 - s = string being parsed
865 - loc = location where expression match was attempted and failed
866 - expr = the parse expression that failed
867 - err = the exception thrown
868 The function returns no value. It may throw ParseFatalException
869 if it is desired to stop parsing immediately."""
870 self.failAction = fn
871 return self
872
874 exprsFound = True
875 while exprsFound:
876 exprsFound = False
877 for e in self.ignoreExprs:
878 try:
879 while 1:
880 loc,dummy = e._parse( instring, loc )
881 exprsFound = True
882 except ParseException:
883 pass
884 return loc
885
887 if self.ignoreExprs:
888 loc = self._skipIgnorables( instring, loc )
889
890 if self.skipWhitespace:
891 wt = self.whiteChars
892 instrlen = len(instring)
893 while loc < instrlen and instring[loc] in wt:
894 loc += 1
895
896 return loc
897
898 - def parseImpl( self, instring, loc, doActions=True ):
900
901 - def postParse( self, instring, loc, tokenlist ):
903
904
905 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
906 debugging = ( self.debug )
907
908 if debugging or self.failAction:
909
910 if (self.debugActions[0] ):
911 self.debugActions[0]( instring, loc, self )
912 if callPreParse and self.callPreparse:
913 preloc = self.preParse( instring, loc )
914 else:
915 preloc = loc
916 tokensStart = loc
917 try:
918 try:
919 loc,tokens = self.parseImpl( instring, preloc, doActions )
920 except IndexError:
921 raise ParseException( instring, len(instring), self.errmsg, self )
922 except ParseBaseException:
923
924 err = None
925 if self.debugActions[2]:
926 err = sys.exc_info()[1]
927 self.debugActions[2]( instring, tokensStart, self, err )
928 if self.failAction:
929 if err is None:
930 err = sys.exc_info()[1]
931 self.failAction( instring, tokensStart, self, err )
932 raise
933 else:
934 if callPreParse and self.callPreparse:
935 preloc = self.preParse( instring, loc )
936 else:
937 preloc = loc
938 tokensStart = loc
939 if self.mayIndexError or loc >= len(instring):
940 try:
941 loc,tokens = self.parseImpl( instring, preloc, doActions )
942 except IndexError:
943 raise ParseException( instring, len(instring), self.errmsg, self )
944 else:
945 loc,tokens = self.parseImpl( instring, preloc, doActions )
946
947 tokens = self.postParse( instring, loc, tokens )
948
949 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
950 if self.parseAction and (doActions or self.callDuringTry):
951 if debugging:
952 try:
953 for fn in self.parseAction:
954 tokens = fn( instring, tokensStart, retTokens )
955 if tokens is not None:
956 retTokens = ParseResults( tokens,
957 self.resultsName,
958 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
959 modal=self.modalResults )
960 except ParseBaseException:
961
962 if (self.debugActions[2] ):
963 err = sys.exc_info()[1]
964 self.debugActions[2]( instring, tokensStart, self, err )
965 raise
966 else:
967 for fn in self.parseAction:
968 tokens = fn( instring, tokensStart, retTokens )
969 if tokens is not None:
970 retTokens = ParseResults( tokens,
971 self.resultsName,
972 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
973 modal=self.modalResults )
974
975 if debugging:
976
977 if (self.debugActions[1] ):
978 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
979
980 return loc, retTokens
981
987
988
989
990 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1006
1007 _parse = _parseNoCache
1008
1009
1010 _exprArgCache = {}
1013 resetCache = staticmethod(resetCache)
1014
1015 _packratEnabled = False
1017 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1018 Repeated parse attempts at the same string location (which happens
1019 often in many complex grammars) can immediately return a cached value,
1020 instead of re-executing parsing/validating code. Memoizing is done of
1021 both valid results and parsing exceptions.
1022
1023 This speedup may break existing programs that use parse actions that
1024 have side-effects. For this reason, packrat parsing is disabled when
1025 you first import pyparsing. To activate the packrat feature, your
1026 program must call the class method ParserElement.enablePackrat(). If
1027 your program uses psyco to "compile as you go", you must call
1028 enablePackrat before calling psyco.full(). If you do not do this,
1029 Python will crash. For best results, call enablePackrat() immediately
1030 after importing pyparsing.
1031 """
1032 if not ParserElement._packratEnabled:
1033 ParserElement._packratEnabled = True
1034 ParserElement._parse = ParserElement._parseCache
1035 enablePackrat = staticmethod(enablePackrat)
1036
1038 """Execute the parse expression with the given string.
1039 This is the main interface to the client code, once the complete
1040 expression has been built.
1041
1042 If you want the grammar to require that the entire input string be
1043 successfully parsed, then set parseAll to True (equivalent to ending
1044 the grammar with StringEnd()).
1045
1046 Note: parseString implicitly calls expandtabs() on the input string,
1047 in order to report proper column numbers in parse actions.
1048 If the input string contains tabs and
1049 the grammar uses parse actions that use the loc argument to index into the
1050 string being parsed, you can ensure you have a consistent view of the input
1051 string by:
1052 - calling parseWithTabs on your grammar before calling parseString
1053 (see L{I{parseWithTabs}<parseWithTabs>})
1054 - define your parse action using the full (s,loc,toks) signature, and
1055 reference the input string using the parse action's s argument
1056 - explictly expand the tabs in your input string before calling
1057 parseString
1058 """
1059 ParserElement.resetCache()
1060 if not self.streamlined:
1061 self.streamline()
1062
1063 for e in self.ignoreExprs:
1064 e.streamline()
1065 if not self.keepTabs:
1066 instring = instring.expandtabs()
1067 try:
1068 loc, tokens = self._parse( instring, 0 )
1069 if parseAll:
1070
1071 se = StringEnd()
1072 se._parse( instring, loc )
1073 except ParseBaseException:
1074 if ParserElement.verbose_stacktrace:
1075 raise
1076 else:
1077
1078 exc = sys.exc_info()[1]
1079 raise exc
1080 else:
1081 return tokens
1082
1084 """Scan the input string for expression matches. Each match will return the
1085 matching tokens, start location, and end location. May be called with optional
1086 maxMatches argument, to clip scanning after 'n' matches are found.
1087
1088 Note that the start and end locations are reported relative to the string
1089 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1090 strings with embedded tabs."""
1091 if not self.streamlined:
1092 self.streamline()
1093 for e in self.ignoreExprs:
1094 e.streamline()
1095
1096 if not self.keepTabs:
1097 instring = _ustr(instring).expandtabs()
1098 instrlen = len(instring)
1099 loc = 0
1100 preparseFn = self.preParse
1101 parseFn = self._parse
1102 ParserElement.resetCache()
1103 matches = 0
1104 try:
1105 while loc <= instrlen and matches < maxMatches:
1106 try:
1107 preloc = preparseFn( instring, loc )
1108 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1109 except ParseException:
1110 loc = preloc+1
1111 else:
1112 if nextLoc > loc:
1113 matches += 1
1114 yield tokens, preloc, nextLoc
1115 loc = nextLoc
1116 else:
1117 loc = preloc+1
1118 except ParseBaseException:
1119 if ParserElement.verbose_stacktrace:
1120 raise
1121 else:
1122
1123 exc = sys.exc_info()[1]
1124 raise exc
1125
1158
1160 """Another extension to scanString, simplifying the access to the tokens found
1161 to match the given parse expression. May be called with optional
1162 maxMatches argument, to clip searching after 'n' matches are found.
1163 """
1164 try:
1165 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1166 except ParseBaseException:
1167 if ParserElement.verbose_stacktrace:
1168 raise
1169 else:
1170
1171 exc = sys.exc_info()[1]
1172 raise exc
1173
1175 """Implementation of + operator - returns And"""
1176 if isinstance( other, basestring ):
1177 other = Literal( other )
1178 if not isinstance( other, ParserElement ):
1179 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1180 SyntaxWarning, stacklevel=2)
1181 return None
1182 return And( [ self, other ] )
1183
1185 """Implementation of + operator when left operand is not a ParserElement"""
1186 if isinstance( other, basestring ):
1187 other = Literal( other )
1188 if not isinstance( other, ParserElement ):
1189 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1190 SyntaxWarning, stacklevel=2)
1191 return None
1192 return other + self
1193
1195 """Implementation of - operator, returns And with error stop"""
1196 if isinstance( other, basestring ):
1197 other = Literal( other )
1198 if not isinstance( other, ParserElement ):
1199 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1200 SyntaxWarning, stacklevel=2)
1201 return None
1202 return And( [ self, And._ErrorStop(), other ] )
1203
1205 """Implementation of - operator when left operand is not a ParserElement"""
1206 if isinstance( other, basestring ):
1207 other = Literal( other )
1208 if not isinstance( other, ParserElement ):
1209 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1210 SyntaxWarning, stacklevel=2)
1211 return None
1212 return other - self
1213
1215 if isinstance(other,int):
1216 minElements, optElements = other,0
1217 elif isinstance(other,tuple):
1218 other = (other + (None, None))[:2]
1219 if other[0] is None:
1220 other = (0, other[1])
1221 if isinstance(other[0],int) and other[1] is None:
1222 if other[0] == 0:
1223 return ZeroOrMore(self)
1224 if other[0] == 1:
1225 return OneOrMore(self)
1226 else:
1227 return self*other[0] + ZeroOrMore(self)
1228 elif isinstance(other[0],int) and isinstance(other[1],int):
1229 minElements, optElements = other
1230 optElements -= minElements
1231 else:
1232 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1233 else:
1234 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1235
1236 if minElements < 0:
1237 raise ValueError("cannot multiply ParserElement by negative value")
1238 if optElements < 0:
1239 raise ValueError("second tuple value must be greater or equal to first tuple value")
1240 if minElements == optElements == 0:
1241 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1242
1243 if (optElements):
1244 def makeOptionalList(n):
1245 if n>1:
1246 return Optional(self + makeOptionalList(n-1))
1247 else:
1248 return Optional(self)
1249 if minElements:
1250 if minElements == 1:
1251 ret = self + makeOptionalList(optElements)
1252 else:
1253 ret = And([self]*minElements) + makeOptionalList(optElements)
1254 else:
1255 ret = makeOptionalList(optElements)
1256 else:
1257 if minElements == 1:
1258 ret = self
1259 else:
1260 ret = And([self]*minElements)
1261 return ret
1262
1265
1267 """Implementation of | operator - returns MatchFirst"""
1268 if isinstance( other, basestring ):
1269 other = Literal( other )
1270 if not isinstance( other, ParserElement ):
1271 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1272 SyntaxWarning, stacklevel=2)
1273 return None
1274 return MatchFirst( [ self, other ] )
1275
1277 """Implementation of | operator when left operand is not a ParserElement"""
1278 if isinstance( other, basestring ):
1279 other = Literal( other )
1280 if not isinstance( other, ParserElement ):
1281 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1282 SyntaxWarning, stacklevel=2)
1283 return None
1284 return other | self
1285
1287 """Implementation of ^ operator - returns Or"""
1288 if isinstance( other, basestring ):
1289 other = Literal( other )
1290 if not isinstance( other, ParserElement ):
1291 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1292 SyntaxWarning, stacklevel=2)
1293 return None
1294 return Or( [ self, other ] )
1295
1297 """Implementation of ^ operator when left operand is not a ParserElement"""
1298 if isinstance( other, basestring ):
1299 other = Literal( other )
1300 if not isinstance( other, ParserElement ):
1301 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1302 SyntaxWarning, stacklevel=2)
1303 return None
1304 return other ^ self
1305
1307 """Implementation of & operator - returns Each"""
1308 if isinstance( other, basestring ):
1309 other = Literal( other )
1310 if not isinstance( other, ParserElement ):
1311 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1312 SyntaxWarning, stacklevel=2)
1313 return None
1314 return Each( [ self, other ] )
1315
1317 """Implementation of & operator when left operand is not a ParserElement"""
1318 if isinstance( other, basestring ):
1319 other = Literal( other )
1320 if not isinstance( other, ParserElement ):
1321 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1322 SyntaxWarning, stacklevel=2)
1323 return None
1324 return other & self
1325
1327 """Implementation of ~ operator - returns NotAny"""
1328 return NotAny( self )
1329
1331 """Shortcut for setResultsName, with listAllMatches=default::
1332 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1333 could be written as::
1334 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1335 """
1336 return self.setResultsName(name)
1337
1339 """Suppresses the output of this ParserElement; useful to keep punctuation from
1340 cluttering up returned output.
1341 """
1342 return Suppress( self )
1343
1345 """Disables the skipping of whitespace before matching the characters in the
1346 ParserElement's defined pattern. This is normally only used internally by
1347 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1348 """
1349 self.skipWhitespace = False
1350 return self
1351
1353 """Overrides the default whitespace chars
1354 """
1355 self.skipWhitespace = True
1356 self.whiteChars = chars
1357 self.copyDefaultWhiteChars = False
1358 return self
1359
1361 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
1362 Must be called before parseString when the input grammar contains elements that
1363 match <TAB> characters."""
1364 self.keepTabs = True
1365 return self
1366
1368 """Define expression to be ignored (e.g., comments) while doing pattern
1369 matching; may be called repeatedly, to define multiple comment or other
1370 ignorable patterns.
1371 """
1372 if isinstance( other, Suppress ):
1373 if other not in self.ignoreExprs:
1374 self.ignoreExprs.append( other.copy() )
1375 else:
1376 self.ignoreExprs.append( Suppress( other.copy() ) )
1377 return self
1378
1379 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1380 """Enable display of debugging messages while doing pattern matching."""
1381 self.debugActions = (startAction or _defaultStartDebugAction,
1382 successAction or _defaultSuccessDebugAction,
1383 exceptionAction or _defaultExceptionDebugAction)
1384 self.debug = True
1385 return self
1386
1388 """Enable display of debugging messages while doing pattern matching.
1389 Set flag to True to enable, False to disable."""
1390 if flag:
1391 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1392 else:
1393 self.debug = False
1394 return self
1395
1398
1401
1403 self.streamlined = True
1404 self.strRepr = None
1405 return self
1406
1409
1410 - def validate( self, validateTrace=[] ):
1411 """Check defined expressions for valid structure, check for infinite recursive definitions."""
1412 self.checkRecursion( [] )
1413
1414 - def parseFile( self, file_or_filename, parseAll=False ):
1415 """Execute the parse expression on the given file or filename.
1416 If a filename is specified (instead of a file object),
1417 the entire file is opened, read, and closed before parsing.
1418 """
1419 try:
1420 file_contents = file_or_filename.read()
1421 except AttributeError:
1422 f = open(file_or_filename, "rb")
1423 file_contents = f.read()
1424 f.close()
1425 try:
1426 return self.parseString(file_contents, parseAll)
1427 except ParseBaseException:
1428
1429 exc = sys.exc_info()[1]
1430 raise exc
1431
1434
1436 if aname == "myException":
1437 self.myException = ret = self.getException();
1438 return ret;
1439 else:
1440 raise AttributeError("no such attribute " + aname)
1441
1443 if isinstance(other, ParserElement):
1444 return self is other or self.__dict__ == other.__dict__
1445 elif isinstance(other, basestring):
1446 try:
1447 self.parseString(_ustr(other), parseAll=True)
1448 return True
1449 except ParseBaseException:
1450 return False
1451 else:
1452 return super(ParserElement,self)==other
1453
1455 return not (self == other)
1456
1458 return hash(id(self))
1459
1461 return self == other
1462
1464 return not (self == other)
1465
1466
1467 -class Token(ParserElement):
1468 """Abstract ParserElement subclass, for defining atomic matching patterns."""
1471
1472
1474 s = super(Token,self).setName(name)
1475 self.errmsg = "Expected " + self.name
1476
1477 return s
1478
1479
1481 """An empty token, will always match."""
1483 super(Empty,self).__init__()
1484 self.name = "Empty"
1485 self.mayReturnEmpty = True
1486 self.mayIndexError = False
1487
1488
1490 """A token that will never match."""
1492 super(NoMatch,self).__init__()
1493 self.name = "NoMatch"
1494 self.mayReturnEmpty = True
1495 self.mayIndexError = False
1496 self.errmsg = "Unmatchable token"
1497
1498
1499 - def parseImpl( self, instring, loc, doActions=True ):
1500 exc = self.myException
1501 exc.loc = loc
1502 exc.pstr = instring
1503 raise exc
1504
1505
1507 """Token to exactly match a specified string."""
1509 super(Literal,self).__init__()
1510 self.match = matchString
1511 self.matchLen = len(matchString)
1512 try:
1513 self.firstMatchChar = matchString[0]
1514 except IndexError:
1515 warnings.warn("null string passed to Literal; use Empty() instead",
1516 SyntaxWarning, stacklevel=2)
1517 self.__class__ = Empty
1518 self.name = '"%s"' % _ustr(self.match)
1519 self.errmsg = "Expected " + self.name
1520 self.mayReturnEmpty = False
1521
1522 self.mayIndexError = False
1523
1524
1525
1526
1527
1528 - def parseImpl( self, instring, loc, doActions=True ):
1529 if (instring[loc] == self.firstMatchChar and
1530 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1531 return loc+self.matchLen, self.match
1532
1533 exc = self.myException
1534 exc.loc = loc
1535 exc.pstr = instring
1536 raise exc
1537 _L = Literal
1538
1540 """Token to exactly match a specified string as a keyword, that is, it must be
1541 immediately followed by a non-keyword character. Compare with Literal::
1542 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
1543 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
1544 Accepts two optional constructor arguments in addition to the keyword string:
1545 identChars is a string of characters that would be valid identifier characters,
1546 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
1547 matching, default is False.
1548 """
1549 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1550
1552 super(Keyword,self).__init__()
1553 self.match = matchString
1554 self.matchLen = len(matchString)
1555 try:
1556 self.firstMatchChar = matchString[0]
1557 except IndexError:
1558 warnings.warn("null string passed to Keyword; use Empty() instead",
1559 SyntaxWarning, stacklevel=2)
1560 self.name = '"%s"' % self.match
1561 self.errmsg = "Expected " + self.name
1562 self.mayReturnEmpty = False
1563
1564 self.mayIndexError = False
1565 self.caseless = caseless
1566 if caseless:
1567 self.caselessmatch = matchString.upper()
1568 identChars = identChars.upper()
1569 self.identChars = set(identChars)
1570
1571 - def parseImpl( self, instring, loc, doActions=True ):
1572 if self.caseless:
1573 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1574 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1575 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1576 return loc+self.matchLen, self.match
1577 else:
1578 if (instring[loc] == self.firstMatchChar and
1579 (self.matchLen==1 or instring.startswith(self.match,loc)) and
1580 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1581 (loc == 0 or instring[loc-1] not in self.identChars) ):
1582 return loc+self.matchLen, self.match
1583
1584 exc = self.myException
1585 exc.loc = loc
1586 exc.pstr = instring
1587 raise exc
1588
1593
1598 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1599
1601 """Token to match a specified string, ignoring case of letters.
1602 Note: the matched results will always be in the case of the given
1603 match string, NOT the case of the input text.
1604 """
1606 super(CaselessLiteral,self).__init__( matchString.upper() )
1607
1608 self.returnString = matchString
1609 self.name = "'%s'" % self.returnString
1610 self.errmsg = "Expected " + self.name
1611
1612
1613 - def parseImpl( self, instring, loc, doActions=True ):
1614 if instring[ loc:loc+self.matchLen ].upper() == self.match:
1615 return loc+self.matchLen, self.returnString
1616
1617 exc = self.myException
1618 exc.loc = loc
1619 exc.pstr = instring
1620 raise exc
1621
1625
1626 - def parseImpl( self, instring, loc, doActions=True ):
1627 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1628 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1629 return loc+self.matchLen, self.match
1630
1631 exc = self.myException
1632 exc.loc = loc
1633 exc.pstr = instring
1634 raise exc
1635
1637 """Token for matching words composed of allowed character sets.
1638 Defined with string containing all allowed initial characters,
1639 an optional string containing allowed body characters (if omitted,
1640 defaults to the initial character set), and an optional minimum,
1641 maximum, and/or exact length. The default value for min is 1 (a
1642 minimum value < 1 is not valid); the default values for max and exact
1643 are 0, meaning no maximum or exact length restriction.
1644 """
1645 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
1646 super(Word,self).__init__()
1647 self.initCharsOrig = initChars
1648 self.initChars = set(initChars)
1649 if bodyChars :
1650 self.bodyCharsOrig = bodyChars
1651 self.bodyChars = set(bodyChars)
1652 else:
1653 self.bodyCharsOrig = initChars
1654 self.bodyChars = set(initChars)
1655
1656 self.maxSpecified = max > 0
1657
1658 if min < 1:
1659 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1660
1661 self.minLen = min
1662
1663 if max > 0:
1664 self.maxLen = max
1665 else:
1666 self.maxLen = _MAX_INT
1667
1668 if exact > 0:
1669 self.maxLen = exact
1670 self.minLen = exact
1671
1672 self.name = _ustr(self)
1673 self.errmsg = "Expected " + self.name
1674
1675 self.mayIndexError = False
1676 self.asKeyword = asKeyword
1677
1678 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1679 if self.bodyCharsOrig == self.initCharsOrig:
1680 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1681 elif len(self.bodyCharsOrig) == 1:
1682 self.reString = "%s[%s]*" % \
1683 (re.escape(self.initCharsOrig),
1684 _escapeRegexRangeChars(self.bodyCharsOrig),)
1685 else:
1686 self.reString = "[%s][%s]*" % \
1687 (_escapeRegexRangeChars(self.initCharsOrig),
1688 _escapeRegexRangeChars(self.bodyCharsOrig),)
1689 if self.asKeyword:
1690 self.reString = r"\b"+self.reString+r"\b"
1691 try:
1692 self.re = re.compile( self.reString )
1693 except:
1694 self.re = None
1695
1696 - def parseImpl( self, instring, loc, doActions=True ):
1697 if self.re:
1698 result = self.re.match(instring,loc)
1699 if not result:
1700 exc = self.myException
1701 exc.loc = loc
1702 exc.pstr = instring
1703 raise exc
1704
1705 loc = result.end()
1706 return loc,result.group()
1707
1708 if not(instring[ loc ] in self.initChars):
1709
1710 exc = self.myException
1711 exc.loc = loc
1712 exc.pstr = instring
1713 raise exc
1714 start = loc
1715 loc += 1
1716 instrlen = len(instring)
1717 bodychars = self.bodyChars
1718 maxloc = start + self.maxLen
1719 maxloc = min( maxloc, instrlen )
1720 while loc < maxloc and instring[loc] in bodychars:
1721 loc += 1
1722
1723 throwException = False
1724 if loc - start < self.minLen:
1725 throwException = True
1726 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1727 throwException = True
1728 if self.asKeyword:
1729 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1730 throwException = True
1731
1732 if throwException:
1733
1734 exc = self.myException
1735 exc.loc = loc
1736 exc.pstr = instring
1737 raise exc
1738
1739 return loc, instring[start:loc]
1740
1742 try:
1743 return super(Word,self).__str__()
1744 except:
1745 pass
1746
1747
1748 if self.strRepr is None:
1749
1750 def charsAsStr(s):
1751 if len(s)>4:
1752 return s[:4]+"..."
1753 else:
1754 return s
1755
1756 if ( self.initCharsOrig != self.bodyCharsOrig ):
1757 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1758 else:
1759 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1760
1761 return self.strRepr
1762
1763
1765 """Token for matching strings that match a given regular expression.
1766 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1767 """
1768 compiledREtype = type(re.compile("[A-Z]"))
1769 - def __init__( self, pattern, flags=0):
1770 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
1771 super(Regex,self).__init__()
1772
1773 if isinstance(pattern, basestring):
1774 if len(pattern) == 0:
1775 warnings.warn("null string passed to Regex; use Empty() instead",
1776 SyntaxWarning, stacklevel=2)
1777
1778 self.pattern = pattern
1779 self.flags = flags
1780
1781 try:
1782 self.re = re.compile(self.pattern, self.flags)
1783 self.reString = self.pattern
1784 except sre_constants.error:
1785 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1786 SyntaxWarning, stacklevel=2)
1787 raise
1788
1789 elif isinstance(pattern, Regex.compiledREtype):
1790 self.re = pattern
1791 self.pattern = \
1792 self.reString = str(pattern)
1793 self.flags = flags
1794
1795 else:
1796 raise ValueError("Regex may only be constructed with a string or a compiled RE object")
1797
1798 self.name = _ustr(self)
1799 self.errmsg = "Expected " + self.name
1800
1801 self.mayIndexError = False
1802 self.mayReturnEmpty = not not (self.re.match(""))
1803
1804 - def parseImpl( self, instring, loc, doActions=True ):
1805 result = self.re.match(instring,loc)
1806 if not result:
1807 exc = self.myException
1808 exc.loc = loc
1809 exc.pstr = instring
1810 raise exc
1811
1812 loc = result.end()
1813 d = result.groupdict()
1814 ret = ParseResults(result.group())
1815 if d:
1816 for k in d:
1817 ret[k] = d[k]
1818 return loc,ret
1819
1821 try:
1822 return super(Regex,self).__str__()
1823 except:
1824 pass
1825
1826 if self.strRepr is None:
1827 self.strRepr = "Re:(%s)" % repr(self.pattern)
1828
1829 return self.strRepr
1830
1831
1833 """Token for matching strings that are delimited by quoting characters.
1834 """
1835 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1836 """
1837 Defined with the following parameters:
1838 - quoteChar - string of one or more characters defining the quote delimiting string
1839 - escChar - character to escape quotes, typically backslash (default=None)
1840 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1841 - multiline - boolean indicating whether quotes can span multiple lines (default=False)
1842 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
1843 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
1844 """
1845 super(QuotedString,self).__init__()
1846
1847
1848 quoteChar = quoteChar.strip()
1849 if len(quoteChar) == 0:
1850 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1851 raise SyntaxError()
1852
1853 if endQuoteChar is None:
1854 endQuoteChar = quoteChar
1855 else:
1856 endQuoteChar = endQuoteChar.strip()
1857 if len(endQuoteChar) == 0:
1858 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1859 raise SyntaxError()
1860
1861 self.quoteChar = quoteChar
1862 self.quoteCharLen = len(quoteChar)
1863 self.firstQuoteChar = quoteChar[0]
1864 self.endQuoteChar = endQuoteChar
1865 self.endQuoteCharLen = len(endQuoteChar)
1866 self.escChar = escChar
1867 self.escQuote = escQuote
1868 self.unquoteResults = unquoteResults
1869
1870 if multiline:
1871 self.flags = re.MULTILINE | re.DOTALL
1872 self.pattern = r'%s(?:[^%s%s]' % \
1873 ( re.escape(self.quoteChar),
1874 _escapeRegexRangeChars(self.endQuoteChar[0]),
1875 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1876 else:
1877 self.flags = 0
1878 self.pattern = r'%s(?:[^%s\n\r%s]' % \
1879 ( re.escape(self.quoteChar),
1880 _escapeRegexRangeChars(self.endQuoteChar[0]),
1881 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1882 if len(self.endQuoteChar) > 1:
1883 self.pattern += (
1884 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
1885 _escapeRegexRangeChars(self.endQuoteChar[i]))
1886 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
1887 )
1888 if escQuote:
1889 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
1890 if escChar:
1891 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
1892 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
1893 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
1894
1895 try:
1896 self.re = re.compile(self.pattern, self.flags)
1897 self.reString = self.pattern
1898 except sre_constants.error:
1899 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
1900 SyntaxWarning, stacklevel=2)
1901 raise
1902
1903 self.name = _ustr(self)
1904 self.errmsg = "Expected " + self.name
1905
1906 self.mayIndexError = False
1907 self.mayReturnEmpty = True
1908
1909 - def parseImpl( self, instring, loc, doActions=True ):
1910 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
1911 if not result:
1912 exc = self.myException
1913 exc.loc = loc
1914 exc.pstr = instring
1915 raise exc
1916
1917 loc = result.end()
1918 ret = result.group()
1919
1920 if self.unquoteResults:
1921
1922
1923 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
1924
1925 if isinstance(ret,basestring):
1926
1927 if self.escChar:
1928 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
1929
1930
1931 if self.escQuote:
1932 ret = ret.replace(self.escQuote, self.endQuoteChar)
1933
1934 return loc, ret
1935
1937 try:
1938 return super(QuotedString,self).__str__()
1939 except:
1940 pass
1941
1942 if self.strRepr is None:
1943 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
1944
1945 return self.strRepr
1946
1947
1949 """Token for matching words composed of characters *not* in a given set.
1950 Defined with string containing all disallowed characters, and an optional
1951 minimum, maximum, and/or exact length. The default value for min is 1 (a
1952 minimum value < 1 is not valid); the default values for max and exact
1953 are 0, meaning no maximum or exact length restriction.
1954 """
1955 - def __init__( self, notChars, min=1, max=0, exact=0 ):
1956 super(CharsNotIn,self).__init__()
1957 self.skipWhitespace = False
1958 self.notChars = notChars
1959
1960 if min < 1:
1961 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
1962
1963 self.minLen = min
1964
1965 if max > 0:
1966 self.maxLen = max
1967 else:
1968 self.maxLen = _MAX_INT
1969
1970 if exact > 0:
1971 self.maxLen = exact
1972 self.minLen = exact
1973
1974 self.name = _ustr(self)
1975 self.errmsg = "Expected " + self.name
1976 self.mayReturnEmpty = ( self.minLen == 0 )
1977
1978 self.mayIndexError = False
1979
1980 - def parseImpl( self, instring, loc, doActions=True ):
1981 if instring[loc] in self.notChars:
1982
1983 exc = self.myException
1984 exc.loc = loc
1985 exc.pstr = instring
1986 raise exc
1987
1988 start = loc
1989 loc += 1
1990 notchars = self.notChars
1991 maxlen = min( start+self.maxLen, len(instring) )
1992 while loc < maxlen and \
1993 (instring[loc] not in notchars):
1994 loc += 1
1995
1996 if loc - start < self.minLen:
1997
1998 exc = self.myException
1999 exc.loc = loc
2000 exc.pstr = instring
2001 raise exc
2002
2003 return loc, instring[start:loc]
2004
2006 try:
2007 return super(CharsNotIn, self).__str__()
2008 except:
2009 pass
2010
2011 if self.strRepr is None:
2012 if len(self.notChars) > 4:
2013 self.strRepr = "!W:(%s...)" % self.notChars[:4]
2014 else:
2015 self.strRepr = "!W:(%s)" % self.notChars
2016
2017 return self.strRepr
2018
2020 """Special matching class for matching whitespace. Normally, whitespace is ignored
2021 by pyparsing grammars. This class is included when some whitespace structures
2022 are significant. Define with a string containing the whitespace characters to be
2023 matched; default is " \\t\\r\\n". Also takes optional min, max, and exact arguments,
2024 as defined for the Word class."""
2025 whiteStrs = {
2026 " " : "<SPC>",
2027 "\t": "<TAB>",
2028 "\n": "<LF>",
2029 "\r": "<CR>",
2030 "\f": "<FF>",
2031 }
2032 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2033 super(White,self).__init__()
2034 self.matchWhite = ws
2035 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
2036
2037 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
2038 self.mayReturnEmpty = True
2039 self.errmsg = "Expected " + self.name
2040
2041
2042 self.minLen = min
2043
2044 if max > 0:
2045 self.maxLen = max
2046 else:
2047 self.maxLen = _MAX_INT
2048
2049 if exact > 0:
2050 self.maxLen = exact
2051 self.minLen = exact
2052
2053 - def parseImpl( self, instring, loc, doActions=True ):
2054 if not(instring[ loc ] in self.matchWhite):
2055
2056 exc = self.myException
2057 exc.loc = loc
2058 exc.pstr = instring
2059 raise exc
2060 start = loc
2061 loc += 1
2062 maxloc = start + self.maxLen
2063 maxloc = min( maxloc, len(instring) )
2064 while loc < maxloc and instring[loc] in self.matchWhite:
2065 loc += 1
2066
2067 if loc - start < self.minLen:
2068
2069 exc = self.myException
2070 exc.loc = loc
2071 exc.pstr = instring
2072 raise exc
2073
2074 return loc, instring[start:loc]
2075
2076
2079 super(_PositionToken,self).__init__()
2080 self.name=self.__class__.__name__
2081 self.mayReturnEmpty = True
2082 self.mayIndexError = False
2083
2085 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2089
2091 if col(loc,instring) != self.col:
2092 instrlen = len(instring)
2093 if self.ignoreExprs:
2094 loc = self._skipIgnorables( instring, loc )
2095 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2096 loc += 1
2097 return loc
2098
2099 - def parseImpl( self, instring, loc, doActions=True ):
2100 thiscol = col( loc, instring )
2101 if thiscol > self.col:
2102 raise ParseException( instring, loc, "Text not in expected column", self )
2103 newloc = loc + self.col - thiscol
2104 ret = instring[ loc: newloc ]
2105 return newloc, ret
2106
2108 """Matches if current position is at the beginning of a line within the parse string"""
2113
2114
2116 preloc = super(LineStart,self).preParse(instring,loc)
2117 if instring[preloc] == "\n":
2118 loc += 1
2119 return loc
2120
2121 - def parseImpl( self, instring, loc, doActions=True ):
2122 if not( loc==0 or
2123 (loc == self.preParse( instring, 0 )) or
2124 (instring[loc-1] == "\n") ):
2125
2126 exc = self.myException
2127 exc.loc = loc
2128 exc.pstr = instring
2129 raise exc
2130 return loc, []
2131
2133 """Matches if current position is at the end of a line within the parse string"""
2138
2139
2140 - def parseImpl( self, instring, loc, doActions=True ):
2141 if loc<len(instring):
2142 if instring[loc] == "\n":
2143 return loc+1, "\n"
2144 else:
2145
2146 exc = self.myException
2147 exc.loc = loc
2148 exc.pstr = instring
2149 raise exc
2150 elif loc == len(instring):
2151 return loc+1, []
2152 else:
2153 exc = self.myException
2154 exc.loc = loc
2155 exc.pstr = instring
2156 raise exc
2157
2159 """Matches if current position is at the beginning of the parse string"""
2163
2164
2165 - def parseImpl( self, instring, loc, doActions=True ):
2166 if loc != 0:
2167
2168 if loc != self.preParse( instring, 0 ):
2169
2170 exc = self.myException
2171 exc.loc = loc
2172 exc.pstr = instring
2173 raise exc
2174 return loc, []
2175
2177 """Matches if current position is at the end of the parse string"""
2181
2182
2183 - def parseImpl( self, instring, loc, doActions=True ):
2184 if loc < len(instring):
2185
2186 exc = self.myException
2187 exc.loc = loc
2188 exc.pstr = instring
2189 raise exc
2190 elif loc == len(instring):
2191 return loc+1, []
2192 elif loc > len(instring):
2193 return loc, []
2194 else:
2195 exc = self.myException
2196 exc.loc = loc
2197 exc.pstr = instring
2198 raise exc
2199
2201 """Matches if the current position is at the beginning of a Word, and
2202 is not preceded by any character in a given set of wordChars
2203 (default=printables). To emulate the \b behavior of regular expressions,
2204 use WordStart(alphanums). WordStart will also match at the beginning of
2205 the string being parsed, or at the beginning of a line.
2206 """
2208 super(WordStart,self).__init__()
2209 self.wordChars = set(wordChars)
2210 self.errmsg = "Not at the start of a word"
2211
2212 - def parseImpl(self, instring, loc, doActions=True ):
2213 if loc != 0:
2214 if (instring[loc-1] in self.wordChars or
2215 instring[loc] not in self.wordChars):
2216 exc = self.myException
2217 exc.loc = loc
2218 exc.pstr = instring
2219 raise exc
2220 return loc, []
2221
2223 """Matches if the current position is at the end of a Word, and
2224 is not followed by any character in a given set of wordChars
2225 (default=printables). To emulate the \b behavior of regular expressions,
2226 use WordEnd(alphanums). WordEnd will also match at the end of
2227 the string being parsed, or at the end of a line.
2228 """
2230 super(WordEnd,self).__init__()
2231 self.wordChars = set(wordChars)
2232 self.skipWhitespace = False
2233 self.errmsg = "Not at the end of a word"
2234
2235 - def parseImpl(self, instring, loc, doActions=True ):
2236 instrlen = len(instring)
2237 if instrlen>0 and loc<instrlen:
2238 if (instring[loc] in self.wordChars or
2239 instring[loc-1] not in self.wordChars):
2240
2241 exc = self.myException
2242 exc.loc = loc
2243 exc.pstr = instring
2244 raise exc
2245 return loc, []
2246
2247
2249 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2250 - def __init__( self, exprs, savelist = False ):
2251 super(ParseExpression,self).__init__(savelist)
2252 if isinstance( exprs, list ):
2253 self.exprs = exprs
2254 elif isinstance( exprs, basestring ):
2255 self.exprs = [ Literal( exprs ) ]
2256 else:
2257 try:
2258 self.exprs = list( exprs )
2259 except TypeError:
2260 self.exprs = [ exprs ]
2261 self.callPreparse = False
2262
2264 return self.exprs[i]
2265
2267 self.exprs.append( other )
2268 self.strRepr = None
2269 return self
2270
2272 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
2273 all contained expressions."""
2274 self.skipWhitespace = False
2275 self.exprs = [ e.copy() for e in self.exprs ]
2276 for e in self.exprs:
2277 e.leaveWhitespace()
2278 return self
2279
2281 if isinstance( other, Suppress ):
2282 if other not in self.ignoreExprs:
2283 super( ParseExpression, self).ignore( other )
2284 for e in self.exprs:
2285 e.ignore( self.ignoreExprs[-1] )
2286 else:
2287 super( ParseExpression, self).ignore( other )
2288 for e in self.exprs:
2289 e.ignore( self.ignoreExprs[-1] )
2290 return self
2291
2293 try:
2294 return super(ParseExpression,self).__str__()
2295 except:
2296 pass
2297
2298 if self.strRepr is None:
2299 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
2300 return self.strRepr
2301
2303 super(ParseExpression,self).streamline()
2304
2305 for e in self.exprs:
2306 e.streamline()
2307
2308
2309
2310
2311 if ( len(self.exprs) == 2 ):
2312 other = self.exprs[0]
2313 if ( isinstance( other, self.__class__ ) and
2314 not(other.parseAction) and
2315 other.resultsName is None and
2316 not other.debug ):
2317 self.exprs = other.exprs[:] + [ self.exprs[1] ]
2318 self.strRepr = None
2319 self.mayReturnEmpty |= other.mayReturnEmpty
2320 self.mayIndexError |= other.mayIndexError
2321
2322 other = self.exprs[-1]
2323 if ( isinstance( other, self.__class__ ) and
2324 not(other.parseAction) and
2325 other.resultsName is None and
2326 not other.debug ):
2327 self.exprs = self.exprs[:-1] + other.exprs[:]
2328 self.strRepr = None
2329 self.mayReturnEmpty |= other.mayReturnEmpty
2330 self.mayIndexError |= other.mayIndexError
2331
2332 return self
2333
2337
2338 - def validate( self, validateTrace=[] ):
2339 tmp = validateTrace[:]+[self]
2340 for e in self.exprs:
2341 e.validate(tmp)
2342 self.checkRecursion( [] )
2343
2344 -class And(ParseExpression):
2345 """Requires all given ParseExpressions to be found in the given order.
2346 Expressions may be separated by whitespace.
2347 May be constructed using the '+' operator.
2348 """
2349
2354
2355 - def __init__( self, exprs, savelist = True ):
2356 super(And,self).__init__(exprs, savelist)
2357 self.mayReturnEmpty = True
2358 for e in self.exprs:
2359 if not e.mayReturnEmpty:
2360 self.mayReturnEmpty = False
2361 break
2362 self.setWhitespaceChars( exprs[0].whiteChars )
2363 self.skipWhitespace = exprs[0].skipWhitespace
2364 self.callPreparse = True
2365
2366 - def parseImpl( self, instring, loc, doActions=True ):
2367
2368
2369 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2370 errorStop = False
2371 for e in self.exprs[1:]:
2372 if isinstance(e, And._ErrorStop):
2373 errorStop = True
2374 continue
2375 if errorStop:
2376 try:
2377 loc, exprtokens = e._parse( instring, loc, doActions )
2378 except ParseSyntaxException:
2379 raise
2380 except ParseBaseException:
2381 pe = sys.exc_info()[1]
2382 raise ParseSyntaxException(pe)
2383 except IndexError:
2384 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2385 else:
2386 loc, exprtokens = e._parse( instring, loc, doActions )
2387 if exprtokens or exprtokens.keys():
2388 resultlist += exprtokens
2389 return loc, resultlist
2390
2392 if isinstance( other, basestring ):
2393 other = Literal( other )
2394 return self.append( other )
2395
2397 subRecCheckList = parseElementList[:] + [ self ]
2398 for e in self.exprs:
2399 e.checkRecursion( subRecCheckList )
2400 if not e.mayReturnEmpty:
2401 break
2402
2404 if hasattr(self,"name"):
2405 return self.name
2406
2407 if self.strRepr is None:
2408 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2409
2410 return self.strRepr
2411
2412
2413 -class Or(ParseExpression):
2414 """Requires that at least one ParseExpression is found.
2415 If two expressions match, the expression that matches the longest string will be used.
2416 May be constructed using the '^' operator.
2417 """
2418 - def __init__( self, exprs, savelist = False ):
2419 super(Or,self).__init__(exprs, savelist)
2420 self.mayReturnEmpty = False
2421 for e in self.exprs:
2422 if e.mayReturnEmpty:
2423 self.mayReturnEmpty = True
2424 break
2425
2426 - def parseImpl( self, instring, loc, doActions=True ):
2427 maxExcLoc = -1
2428 maxMatchLoc = -1
2429 maxException = None
2430 for e in self.exprs:
2431 try:
2432 loc2 = e.tryParse( instring, loc )
2433 except ParseException:
2434 err = sys.exc_info()[1]
2435 if err.loc > maxExcLoc:
2436 maxException = err
2437 maxExcLoc = err.loc
2438 except IndexError:
2439 if len(instring) > maxExcLoc:
2440 maxException = ParseException(instring,len(instring),e.errmsg,self)
2441 maxExcLoc = len(instring)
2442 else:
2443 if loc2 > maxMatchLoc:
2444 maxMatchLoc = loc2
2445 maxMatchExp = e
2446
2447 if maxMatchLoc < 0:
2448 if maxException is not None:
2449 raise maxException
2450 else:
2451 raise ParseException(instring, loc, "no defined alternatives to match", self)
2452
2453 return maxMatchExp._parse( instring, loc, doActions )
2454
2456 if isinstance( other, basestring ):
2457 other = Literal( other )
2458 return self.append( other )
2459
2461 if hasattr(self,"name"):
2462 return self.name
2463
2464 if self.strRepr is None:
2465 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2466
2467 return self.strRepr
2468
2470 subRecCheckList = parseElementList[:] + [ self ]
2471 for e in self.exprs:
2472 e.checkRecursion( subRecCheckList )
2473
2474
2476 """Requires that at least one ParseExpression is found.
2477 If two expressions match, the first one listed is the one that will match.
2478 May be constructed using the '|' operator.
2479 """
2480 - def __init__( self, exprs, savelist = False ):
2481 super(MatchFirst,self).__init__(exprs, savelist)
2482 if exprs:
2483 self.mayReturnEmpty = False
2484 for e in self.exprs:
2485 if e.mayReturnEmpty:
2486 self.mayReturnEmpty = True
2487 break
2488 else:
2489 self.mayReturnEmpty = True
2490
2491 - def parseImpl( self, instring, loc, doActions=True ):
2492 maxExcLoc = -1
2493 maxException = None
2494 for e in self.exprs:
2495 try:
2496 ret = e._parse( instring, loc, doActions )
2497 return ret
2498 except ParseException, err:
2499 if err.loc > maxExcLoc:
2500 maxException = err
2501 maxExcLoc = err.loc
2502 except IndexError:
2503 if len(instring) > maxExcLoc:
2504 maxException = ParseException(instring,len(instring),e.errmsg,self)
2505 maxExcLoc = len(instring)
2506
2507
2508 else:
2509 if maxException is not None:
2510 raise maxException
2511 else:
2512 raise ParseException(instring, loc, "no defined alternatives to match", self)
2513
2515 if isinstance( other, basestring ):
2516 other = Literal( other )
2517 return self.append( other )
2518
2520 if hasattr(self,"name"):
2521 return self.name
2522
2523 if self.strRepr is None:
2524 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2525
2526 return self.strRepr
2527
2529 subRecCheckList = parseElementList[:] + [ self ]
2530 for e in self.exprs:
2531 e.checkRecursion( subRecCheckList )
2532
2533
2534 -class Each(ParseExpression):
2535 """Requires all given ParseExpressions to be found, but in any order.
2536 Expressions may be separated by whitespace.
2537 May be constructed using the '&' operator.
2538 """
2539 - def __init__( self, exprs, savelist = True ):
2540 super(Each,self).__init__(exprs, savelist)
2541 self.mayReturnEmpty = True
2542 for e in self.exprs:
2543 if not e.mayReturnEmpty:
2544 self.mayReturnEmpty = False
2545 break
2546 self.skipWhitespace = True
2547 self.initExprGroups = True
2548
2549 - def parseImpl( self, instring, loc, doActions=True ):
2550 if self.initExprGroups:
2551 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
2552 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ]
2553 self.optionals = opt1 + opt2
2554 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
2555 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
2556 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2557 self.required += self.multirequired
2558 self.initExprGroups = False
2559 tmpLoc = loc
2560 tmpReqd = self.required[:]
2561 tmpOpt = self.optionals[:]
2562 matchOrder = []
2563
2564 keepMatching = True
2565 while keepMatching:
2566 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2567 failed = []
2568 for e in tmpExprs:
2569 try:
2570 tmpLoc = e.tryParse( instring, tmpLoc )
2571 except ParseException:
2572 failed.append(e)
2573 else:
2574 matchOrder.append(e)
2575 if e in tmpReqd:
2576 tmpReqd.remove(e)
2577 elif e in tmpOpt:
2578 tmpOpt.remove(e)
2579 if len(failed) == len(tmpExprs):
2580 keepMatching = False
2581
2582 if tmpReqd:
2583 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
2584 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2585
2586
2587 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
2588
2589 resultlist = []
2590 for e in matchOrder:
2591 loc,results = e._parse(instring,loc,doActions)
2592 resultlist.append(results)
2593
2594 finalResults = ParseResults([])
2595 for r in resultlist:
2596 dups = {}
2597 for k in r.keys():
2598 if k in finalResults.keys():
2599 tmp = ParseResults(finalResults[k])
2600 tmp += ParseResults(r[k])
2601 dups[k] = tmp
2602 finalResults += ParseResults(r)
2603 for k,v in dups.items():
2604 finalResults[k] = v
2605 return loc, finalResults
2606
2608 if hasattr(self,"name"):
2609 return self.name
2610
2611 if self.strRepr is None:
2612 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2613
2614 return self.strRepr
2615
2617 subRecCheckList = parseElementList[:] + [ self ]
2618 for e in self.exprs:
2619 e.checkRecursion( subRecCheckList )
2620
2621
2623 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2624 - def __init__( self, expr, savelist=False ):
2625 super(ParseElementEnhance,self).__init__(savelist)
2626 if isinstance( expr, basestring ):
2627 expr = Literal(expr)
2628 self.expr = expr
2629 self.strRepr = None
2630 if expr is not None:
2631 self.mayIndexError = expr.mayIndexError
2632 self.mayReturnEmpty = expr.mayReturnEmpty
2633 self.setWhitespaceChars( expr.whiteChars )
2634 self.skipWhitespace = expr.skipWhitespace
2635 self.saveAsList = expr.saveAsList
2636 self.callPreparse = expr.callPreparse
2637 self.ignoreExprs.extend(expr.ignoreExprs)
2638
2639 - def parseImpl( self, instring, loc, doActions=True ):
2640 if self.expr is not None:
2641 return self.expr._parse( instring, loc, doActions, callPreParse=False )
2642 else:
2643 raise ParseException("",loc,self.errmsg,self)
2644
2646 self.skipWhitespace = False
2647 self.expr = self.expr.copy()
2648 if self.expr is not None:
2649 self.expr.leaveWhitespace()
2650 return self
2651
2653 if isinstance( other, Suppress ):
2654 if other not in self.ignoreExprs:
2655 super( ParseElementEnhance, self).ignore( other )
2656 if self.expr is not None:
2657 self.expr.ignore( self.ignoreExprs[-1] )
2658 else:
2659 super( ParseElementEnhance, self).ignore( other )
2660 if self.expr is not None:
2661 self.expr.ignore( self.ignoreExprs[-1] )
2662 return self
2663
2669
2671 if self in parseElementList:
2672 raise RecursiveGrammarException( parseElementList+[self] )
2673 subRecCheckList = parseElementList[:] + [ self ]
2674 if self.expr is not None:
2675 self.expr.checkRecursion( subRecCheckList )
2676
2677 - def validate( self, validateTrace=[] ):
2678 tmp = validateTrace[:]+[self]
2679 if self.expr is not None:
2680 self.expr.validate(tmp)
2681 self.checkRecursion( [] )
2682
2684 try:
2685 return super(ParseElementEnhance,self).__str__()
2686 except:
2687 pass
2688
2689 if self.strRepr is None and self.expr is not None:
2690 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2691 return self.strRepr
2692
2693
2695 """Lookahead matching of the given parse expression. FollowedBy
2696 does *not* advance the parsing position within the input string, it only
2697 verifies that the specified parse expression matches at the current
2698 position. FollowedBy always returns a null token list."""
2702
2703 - def parseImpl( self, instring, loc, doActions=True ):
2704 self.expr.tryParse( instring, loc )
2705 return loc, []
2706
2707
2708 -class NotAny(ParseElementEnhance):
2709 """Lookahead to disallow matching with the given parse expression. NotAny
2710 does *not* advance the parsing position within the input string, it only
2711 verifies that the specified parse expression does *not* match at the current
2712 position. Also, NotAny does *not* skip over leading whitespace. NotAny
2713 always returns a null token list. May be constructed using the '~' operator."""
2715 super(NotAny,self).__init__(expr)
2716
2717 self.skipWhitespace = False
2718 self.mayReturnEmpty = True
2719 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2720
2721
2722 - def parseImpl( self, instring, loc, doActions=True ):
2723 try:
2724 self.expr.tryParse( instring, loc )
2725 except (ParseException,IndexError):
2726 pass
2727 else:
2728
2729 exc = self.myException
2730 exc.loc = loc
2731 exc.pstr = instring
2732 raise exc
2733 return loc, []
2734
2736 if hasattr(self,"name"):
2737 return self.name
2738
2739 if self.strRepr is None:
2740 self.strRepr = "~{" + _ustr(self.expr) + "}"
2741
2742 return self.strRepr
2743
2744
2746 """Optional repetition of zero or more of the given expression."""
2750
2751 - def parseImpl( self, instring, loc, doActions=True ):
2752 tokens = []
2753 try:
2754 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2755 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2756 while 1:
2757 if hasIgnoreExprs:
2758 preloc = self._skipIgnorables( instring, loc )
2759 else:
2760 preloc = loc
2761 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2762 if tmptokens or tmptokens.keys():
2763 tokens += tmptokens
2764 except (ParseException,IndexError):
2765 pass
2766
2767 return loc, tokens
2768
2770 if hasattr(self,"name"):
2771 return self.name
2772
2773 if self.strRepr is None:
2774 self.strRepr = "[" + _ustr(self.expr) + "]..."
2775
2776 return self.strRepr
2777
2782
2783
2785 """Repetition of one or more of the given expression."""
2786 - def parseImpl( self, instring, loc, doActions=True ):
2787
2788 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2789 try:
2790 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2791 while 1:
2792 if hasIgnoreExprs:
2793 preloc = self._skipIgnorables( instring, loc )
2794 else:
2795 preloc = loc
2796 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2797 if tmptokens or tmptokens.keys():
2798 tokens += tmptokens
2799 except (ParseException,IndexError):
2800 pass
2801
2802 return loc, tokens
2803
2805 if hasattr(self,"name"):
2806 return self.name
2807
2808 if self.strRepr is None:
2809 self.strRepr = "{" + _ustr(self.expr) + "}..."
2810
2811 return self.strRepr
2812
2817
2824
2825 _optionalNotMatched = _NullToken()
2827 """Optional matching of the given expression.
2828 A default return string can also be specified, if the optional expression
2829 is not found.
2830 """
2832 super(Optional,self).__init__( exprs, savelist=False )
2833 self.defaultValue = default
2834 self.mayReturnEmpty = True
2835
2836 - def parseImpl( self, instring, loc, doActions=True ):
2837 try:
2838 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2839 except (ParseException,IndexError):
2840 if self.defaultValue is not _optionalNotMatched:
2841 if self.expr.resultsName:
2842 tokens = ParseResults([ self.defaultValue ])
2843 tokens[self.expr.resultsName] = self.defaultValue
2844 else:
2845 tokens = [ self.defaultValue ]
2846 else:
2847 tokens = []
2848 return loc, tokens
2849
2851 if hasattr(self,"name"):
2852 return self.name
2853
2854 if self.strRepr is None:
2855 self.strRepr = "[" + _ustr(self.expr) + "]"
2856
2857 return self.strRepr
2858
2859
2860 -class SkipTo(ParseElementEnhance):
2861 """Token for skipping over all undefined text until the matched expression is found.
2862 If include is set to true, the matched expression is also parsed (the skipped text
2863 and matched expression are returned as a 2-element list). The ignore
2864 argument is used to define grammars (typically quoted strings and comments) that
2865 might contain false matches.
2866 """
2867 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2868 super( SkipTo, self ).__init__( other )
2869 self.ignoreExpr = ignore
2870 self.mayReturnEmpty = True
2871 self.mayIndexError = False
2872 self.includeMatch = include
2873 self.asList = False
2874 if failOn is not None and isinstance(failOn, basestring):
2875 self.failOn = Literal(failOn)
2876 else:
2877 self.failOn = failOn
2878 self.errmsg = "No match found for "+_ustr(self.expr)
2879
2880
2881 - def parseImpl( self, instring, loc, doActions=True ):
2882 startLoc = loc
2883 instrlen = len(instring)
2884 expr = self.expr
2885 failParse = False
2886 while loc <= instrlen:
2887 try:
2888 if self.failOn:
2889 try:
2890 self.failOn.tryParse(instring, loc)
2891 except ParseBaseException:
2892 pass
2893 else:
2894 failParse = True
2895 raise ParseException(instring, loc, "Found expression " + str(self.failOn))
2896 failParse = False
2897 if self.ignoreExpr is not None:
2898 while 1:
2899 try:
2900 loc = self.ignoreExpr.tryParse(instring,loc)
2901
2902 except ParseBaseException:
2903 break
2904 expr._parse( instring, loc, doActions=False, callPreParse=False )
2905 skipText = instring[startLoc:loc]
2906 if self.includeMatch:
2907 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
2908 if mat:
2909 skipRes = ParseResults( skipText )
2910 skipRes += mat
2911 return loc, [ skipRes ]
2912 else:
2913 return loc, [ skipText ]
2914 else:
2915 return loc, [ skipText ]
2916 except (ParseException,IndexError):
2917 if failParse:
2918 raise
2919 else:
2920 loc += 1
2921 exc = self.myException
2922 exc.loc = loc
2923 exc.pstr = instring
2924 raise exc
2925
2926 -class Forward(ParseElementEnhance):
2927 """Forward declaration of an expression to be defined later -
2928 used for recursive grammars, such as algebraic infix notation.
2929 When the expression is known, it is assigned to the Forward variable using the '<<' operator.
2930
2931 Note: take care when assigning to Forward not to overlook precedence of operators.
2932 Specifically, '|' has a lower precedence than '<<', so that::
2933 fwdExpr << a | b | c
2934 will actually be evaluated as::
2935 (fwdExpr << a) | b | c
2936 thereby leaving b and c out as parseable alternatives. It is recommended that you
2937 explicitly group the values inserted into the Forward::
2938 fwdExpr << (a | b | c)
2939 """
2942
2944 if isinstance( other, basestring ):
2945 other = Literal(other)
2946 self.expr = other
2947 self.mayReturnEmpty = other.mayReturnEmpty
2948 self.strRepr = None
2949 self.mayIndexError = self.expr.mayIndexError
2950 self.mayReturnEmpty = self.expr.mayReturnEmpty
2951 self.setWhitespaceChars( self.expr.whiteChars )
2952 self.skipWhitespace = self.expr.skipWhitespace
2953 self.saveAsList = self.expr.saveAsList
2954 self.ignoreExprs.extend(self.expr.ignoreExprs)
2955 return None
2956
2958 self.skipWhitespace = False
2959 return self
2960
2962 if not self.streamlined:
2963 self.streamlined = True
2964 if self.expr is not None:
2965 self.expr.streamline()
2966 return self
2967
2968 - def validate( self, validateTrace=[] ):
2969 if self not in validateTrace:
2970 tmp = validateTrace[:]+[self]
2971 if self.expr is not None:
2972 self.expr.validate(tmp)
2973 self.checkRecursion([])
2974
2976 if hasattr(self,"name"):
2977 return self.name
2978
2979 self._revertClass = self.__class__
2980 self.__class__ = _ForwardNoRecurse
2981 try:
2982 if self.expr is not None:
2983 retString = _ustr(self.expr)
2984 else:
2985 retString = "None"
2986 finally:
2987 self.__class__ = self._revertClass
2988 return self.__class__.__name__ + ": " + retString
2989
2991 if self.expr is not None:
2992 return super(Forward,self).copy()
2993 else:
2994 ret = Forward()
2995 ret << self
2996 return ret
2997
3001
3003 """Abstract subclass of ParseExpression, for converting parsed results."""
3004 - def __init__( self, expr, savelist=False ):
3007
3008 -class Upcase(TokenConverter):
3009 """Converter to upper case all matching tokens."""
3011 super(Upcase,self).__init__(*args)
3012 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
3013 DeprecationWarning,stacklevel=2)
3014
3015 - def postParse( self, instring, loc, tokenlist ):
3016 return list(map( string.upper, tokenlist ))
3017
3018
3020 """Converter to concatenate all matching tokens to a single string.
3021 By default, the matching patterns must also be contiguous in the input string;
3022 this can be disabled by specifying 'adjacent=False' in the constructor.
3023 """
3024 - def __init__( self, expr, joinString="", adjacent=True ):
3025 super(Combine,self).__init__( expr )
3026
3027 if adjacent:
3028 self.leaveWhitespace()
3029 self.adjacent = adjacent
3030 self.skipWhitespace = True
3031 self.joinString = joinString
3032
3039
3040 - def postParse( self, instring, loc, tokenlist ):
3041 retToks = tokenlist.copy()
3042 del retToks[:]
3043 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
3044
3045 if self.resultsName and len(retToks.keys())>0:
3046 return [ retToks ]
3047 else:
3048 return retToks
3049
3050 -class Group(TokenConverter):
3051 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
3053 super(Group,self).__init__( expr )
3054 self.saveAsList = True
3055
3056 - def postParse( self, instring, loc, tokenlist ):
3057 return [ tokenlist ]
3058
3059 -class Dict(TokenConverter):
3060 """Converter to return a repetitive expression as a list, but also as a dictionary.
3061 Each element can also be referenced using the first token in the expression as its key.
3062 Useful for tabular report scraping when the first column can be used as a item key.
3063 """
3065 super(Dict,self).__init__( exprs )
3066 self.saveAsList = True
3067
3068 - def postParse( self, instring, loc, tokenlist ):
3069 for i,tok in enumerate(tokenlist):
3070 if len(tok) == 0:
3071 continue
3072 ikey = tok[0]
3073 if isinstance(ikey,int):
3074 ikey = _ustr(tok[0]).strip()
3075 if len(tok)==1:
3076 tokenlist[ikey] = _ParseResultsWithOffset("",i)
3077 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
3078 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
3079 else:
3080 dictvalue = tok.copy()
3081 del dictvalue[0]
3082 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
3083 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
3084 else:
3085 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
3086
3087 if self.resultsName:
3088 return [ tokenlist ]
3089 else:
3090 return tokenlist
3091
3092
3094 """Converter for ignoring the results of a parsed expression."""
3095 - def postParse( self, instring, loc, tokenlist ):
3097
3100
3101
3103 """Wrapper for parse actions, to ensure they are only called once."""
3105 self.callable = ParserElement._normalizeParseActionArgs(methodCall)
3106 self.called = False
3108 if not self.called:
3109 results = self.callable(s,l,t)
3110 self.called = True
3111 return results
3112 raise ParseException(s,l,"")
3115
3117 """Decorator for debugging parse actions."""
3118 f = ParserElement._normalizeParseActionArgs(f)
3119 def z(*paArgs):
3120 thisFunc = f.func_name
3121 s,l,t = paArgs[-3:]
3122 if len(paArgs)>3:
3123 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3124 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
3125 try:
3126 ret = f(*paArgs)
3127 except Exception:
3128 exc = sys.exc_info()[1]
3129 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3130 raise
3131 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3132 return ret
3133 try:
3134 z.__name__ = f.__name__
3135 except AttributeError:
3136 pass
3137 return z
3138
3139
3140
3141
3143 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3144 By default, the list elements and delimiters can have intervening whitespace, and
3145 comments, but this can be overridden by passing 'combine=True' in the constructor.
3146 If combine is set to True, the matching tokens are returned as a single token
3147 string, with the delimiters included; otherwise, the matching tokens are returned
3148 as a list of tokens, with the delimiters suppressed.
3149 """
3150 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
3151 if combine:
3152 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
3153 else:
3154 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3155
3157 """Helper to define a counted list of expressions.
3158 This helper defines a pattern of the form::
3159 integer expr expr expr...
3160 where the leading integer tells how many expr expressions follow.
3161 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3162 """
3163 arrayExpr = Forward()
3164 def countFieldParseAction(s,l,t):
3165 n = int(t[0])
3166 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
3167 return []
3168 return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )
3169
3171 if type(L) is not list: return [L]
3172 if L == []: return L
3173 return _flatten(L[0]) + _flatten(L[1:])
3174
3176 """Helper to define an expression that is indirectly defined from
3177 the tokens matched in a previous expression, that is, it looks
3178 for a 'repeat' of a previous expression. For example::
3179 first = Word(nums)
3180 second = matchPreviousLiteral(first)
3181 matchExpr = first + ":" + second
3182 will match "1:1", but not "1:2". Because this matches a
3183 previous literal, will also match the leading "1:1" in "1:10".
3184 If this is not desired, use matchPreviousExpr.
3185 Do *not* use with packrat parsing enabled.
3186 """
3187 rep = Forward()
3188 def copyTokenToRepeater(s,l,t):
3189 if t:
3190 if len(t) == 1:
3191 rep << t[0]
3192 else:
3193
3194 tflat = _flatten(t.asList())
3195 rep << And( [ Literal(tt) for tt in tflat ] )
3196 else:
3197 rep << Empty()
3198 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3199 return rep
3200
3202 """Helper to define an expression that is indirectly defined from
3203 the tokens matched in a previous expression, that is, it looks
3204 for a 'repeat' of a previous expression. For example::
3205 first = Word(nums)
3206 second = matchPreviousExpr(first)
3207 matchExpr = first + ":" + second
3208 will match "1:1", but not "1:2". Because this matches by
3209 expressions, will *not* match the leading "1:1" in "1:10";
3210 the expressions are evaluated first, and then compared, so
3211 "1" is compared with "10".
3212 Do *not* use with packrat parsing enabled.
3213 """
3214 rep = Forward()
3215 e2 = expr.copy()
3216 rep << e2
3217 def copyTokenToRepeater(s,l,t):
3218 matchTokens = _flatten(t.asList())
3219 def mustMatchTheseTokens(s,l,t):
3220 theseTokens = _flatten(t.asList())
3221 if theseTokens != matchTokens:
3222 raise ParseException("",0,"")
3223 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
3224 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3225 return rep
3226
3228
3229 for c in r"\^-]":
3230 s = s.replace(c,_bslash+c)
3231 s = s.replace("\n",r"\n")
3232 s = s.replace("\t",r"\t")
3233 return _ustr(s)
3234
3235 -def oneOf( strs, caseless=False, useRegex=True ):
3236 """Helper to quickly define a set of alternative Literals, and makes sure to do
3237 longest-first testing when there is a conflict, regardless of the input order,
3238 but returns a MatchFirst for best performance.
3239
3240 Parameters:
3241 - strs - a string of space-delimited literals, or a list of string literals
3242 - caseless - (default=False) - treat all literals as caseless
3243 - useRegex - (default=True) - as an optimization, will generate a Regex
3244 object; otherwise, will generate a MatchFirst object (if caseless=True, or
3245 if creating a Regex raises an exception)
3246 """
3247 if caseless:
3248 isequal = ( lambda a,b: a.upper() == b.upper() )
3249 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
3250 parseElementClass = CaselessLiteral
3251 else:
3252 isequal = ( lambda a,b: a == b )
3253 masks = ( lambda a,b: b.startswith(a) )
3254 parseElementClass = Literal
3255
3256 if isinstance(strs,(list,tuple)):
3257 symbols = list(strs[:])
3258 elif isinstance(strs,basestring):
3259 symbols = strs.split()
3260 else:
3261 warnings.warn("Invalid argument to oneOf, expected string or list",
3262 SyntaxWarning, stacklevel=2)
3263
3264 i = 0
3265 while i < len(symbols)-1:
3266 cur = symbols[i]
3267 for j,other in enumerate(symbols[i+1:]):
3268 if ( isequal(other, cur) ):
3269 del symbols[i+j+1]
3270 break
3271 elif ( masks(cur, other) ):
3272 del symbols[i+j+1]
3273 symbols.insert(i,other)
3274 cur = other
3275 break
3276 else:
3277 i += 1
3278
3279 if not caseless and useRegex:
3280
3281 try:
3282 if len(symbols)==len("".join(symbols)):
3283 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
3284 else:
3285 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
3286 except:
3287 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
3288 SyntaxWarning, stacklevel=2)
3289
3290
3291
3292 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3293
3295 """Helper to easily and clearly define a dictionary by specifying the respective patterns
3296 for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
3297 in the proper order. The key pattern can include delimiting markers or punctuation,
3298 as long as they are suppressed, thereby leaving the significant key text. The value
3299 pattern can include named results, so that the Dict results can include named token
3300 fields.
3301 """
3302 return Dict( ZeroOrMore( Group ( key + value ) ) )
3303
3304 -def originalTextFor(expr, asString=True):
3305 """Helper to return the original, untokenized text for a given expression. Useful to
3306 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
3307 revert separate tokens with intervening whitespace back to the original matching
3308 input text. Simpler to use than the parse action keepOriginalText, and does not
3309 require the inspect module to chase up the call stack. By default, returns a
3310 string containing the original parsed text.
3311
3312 If the optional asString argument is passed as False, then the return value is a
3313 ParseResults containing any results names that were originally matched, and a
3314 single token containing the original matched text from the input string. So if
3315 the expression passed to originalTextFor contains expressions with defined
3316 results names, you must set asString to False if you want to preserve those
3317 results name values."""
3318 locMarker = Empty().setParseAction(lambda s,loc,t: loc).leaveWhitespace()
3319 matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")
3320 if asString:
3321 extractText = lambda s,l,t: s[t._original_start:t._original_end]
3322 else:
3323 def extractText(s,l,t):
3324 del t[:]
3325 t.insert(0, s[t._original_start:t._original_end])
3326 del t["_original_start"]
3327 del t["_original_end"]
3328 matchExpr.setParseAction(extractText)
3329 return matchExpr
3330
3331
3332 empty = Empty().setName("empty")
3333 lineStart = LineStart().setName("lineStart")
3334 lineEnd = LineEnd().setName("lineEnd")
3335 stringStart = StringStart().setName("stringStart")
3336 stringEnd = StringEnd().setName("stringEnd")
3337
3338 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
3339 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
3340 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
3341 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
3342 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
3343 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
3344 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3345
3346 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
3347
3349 r"""Helper to easily define string ranges for use in Word construction. Borrows
3350 syntax from regexp '[]' string range definitions::
3351 srange("[0-9]") -> "0123456789"
3352 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
3353 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3354 The input string must be enclosed in []'s, and the returned string is the expanded
3355 character set joined into a single string.
3356 The values enclosed in the []'s may be::
3357 a single character
3358 an escaped character with a leading backslash (such as \- or \])
3359 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
3360 an escaped octal character with a leading '\0' (\041, which is a '!' character)
3361 a range of any of the above, separated by a dash ('a-z', etc.)
3362 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3363 """
3364 try:
3365 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
3366 except:
3367 return ""
3368
3370 """Helper method for defining parse actions that require matching at a specific
3371 column in the input text.
3372 """
3373 def verifyCol(strg,locn,toks):
3374 if col(locn,strg) != n:
3375 raise ParseException(strg,locn,"matched token not at column %d" % n)
3376 return verifyCol
3377
3379 """Helper method for common parse actions that simply return a literal value. Especially
3380 useful when used with transformString().
3381 """
3382 def _replFunc(*args):
3383 return [replStr]
3384 return _replFunc
3385
3387 """Helper parse action for removing quotation marks from parsed quoted strings.
3388 To use, add this parse action to quoted string using::
3389 quotedString.setParseAction( removeQuotes )
3390 """
3391 return t[0][1:-1]
3392
3394 """Helper parse action to convert tokens to upper case."""
3395 return [ tt.upper() for tt in map(_ustr,t) ]
3396
3398 """Helper parse action to convert tokens to lower case."""
3399 return [ tt.lower() for tt in map(_ustr,t) ]
3400
3401 -def keepOriginalText(s,startLoc,t):
3402 """DEPRECATED - use new helper method 'originalTextFor'.
3403 Helper parse action to preserve original parsed text,
3404 overriding any nested parse actions."""
3405 try:
3406 endloc = getTokensEndLoc()
3407 except ParseException:
3408 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
3409 del t[:]
3410 t += ParseResults(s[startLoc:endloc])
3411 return t
3412
3414 """Method to be called from within a parse action to determine the end
3415 location of the parsed tokens."""
3416 import inspect
3417 fstack = inspect.stack()
3418 try:
3419
3420 for f in fstack[2:]:
3421 if f[3] == "_parseNoCache":
3422 endloc = f[0].f_locals["loc"]
3423 return endloc
3424 else:
3425 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
3426 finally:
3427 del fstack
3428
3456
3460
3464
3466 """Helper to create a validating parse action to be used with start tags created
3467 with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
3468 with a required attribute value, to avoid false matches on common tags such as
3469 <TD> or <DIV>.
3470
3471 Call withAttribute with a series of attribute names and values. Specify the list
3472 of filter attributes names and values as:
3473 - keyword arguments, as in (class="Customer",align="right"), or
3474 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3475 For attribute names with a namespace prefix, you must use the second form. Attribute
3476 names are matched insensitive to upper/lower case.
3477
3478 To verify that the attribute exists, but without specifying a value, pass
3479 withAttribute.ANY_VALUE as the value.
3480 """
3481 if args:
3482 attrs = args[:]
3483 else:
3484 attrs = attrDict.items()
3485 attrs = [(k,v) for k,v in attrs]
3486 def pa(s,l,tokens):
3487 for attrName,attrValue in attrs:
3488 if attrName not in tokens:
3489 raise ParseException(s,l,"no matching attribute " + attrName)
3490 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
3491 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3492 (attrName, tokens[attrName], attrValue))
3493 return pa
3494 withAttribute.ANY_VALUE = object()
3495
3496 opAssoc = _Constants()
3497 opAssoc.LEFT = object()
3498 opAssoc.RIGHT = object()
3499
3501 """Helper method for constructing grammars of expressions made up of
3502 operators working in a precedence hierarchy. Operators may be unary or
3503 binary, left- or right-associative. Parse actions can also be attached
3504 to operator expressions.
3505
3506 Parameters:
3507 - baseExpr - expression representing the most basic element for the nested
3508 - opList - list of tuples, one for each operator precedence level in the
3509 expression grammar; each tuple is of the form
3510 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3511 - opExpr is the pyparsing expression for the operator;
3512 may also be a string, which will be converted to a Literal;
3513 if numTerms is 3, opExpr is a tuple of two expressions, for the
3514 two operators separating the 3 terms
3515 - numTerms is the number of terms for this operator (must
3516 be 1, 2, or 3)
3517 - rightLeftAssoc is the indicator whether the operator is
3518 right or left associative, using the pyparsing-defined
3519 constants opAssoc.RIGHT and opAssoc.LEFT.
3520 - parseAction is the parse action to be associated with
3521 expressions matching this operator expression (the
3522 parse action tuple member may be omitted)
3523 """
3524 ret = Forward()
3525 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
3526 for i,operDef in enumerate(opList):
3527 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3528 if arity == 3:
3529 if opExpr is None or len(opExpr) != 2:
3530 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3531 opExpr1, opExpr2 = opExpr
3532 thisExpr = Forward()
3533 if rightLeftAssoc == opAssoc.LEFT:
3534 if arity == 1:
3535 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
3536 elif arity == 2:
3537 if opExpr is not None:
3538 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
3539 else:
3540 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
3541 elif arity == 3:
3542 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3543 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
3544 else:
3545 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3546 elif rightLeftAssoc == opAssoc.RIGHT:
3547 if arity == 1:
3548
3549 if not isinstance(opExpr, Optional):
3550 opExpr = Optional(opExpr)
3551 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3552 elif arity == 2:
3553 if opExpr is not None:
3554 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
3555 else:
3556 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
3557 elif arity == 3:
3558 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3559 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
3560 else:
3561 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3562 else:
3563 raise ValueError("operator must indicate right or left associativity")
3564 if pa:
3565 matchExpr.setParseAction( pa )
3566 thisExpr << ( matchExpr | lastExpr )
3567 lastExpr = thisExpr
3568 ret << lastExpr
3569 return ret
3570
3571 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
3572 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
3573 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
3574 unicodeString = Combine(_L('u') + quotedString.copy())
3575
3577 """Helper method for defining nested lists enclosed in opening and closing
3578 delimiters ("(" and ")" are the default).
3579
3580 Parameters:
3581 - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3582 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3583 - content - expression for items within the nested lists (default=None)
3584 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3585
3586 If an expression is not provided for the content argument, the nested
3587 expression will capture all whitespace-delimited content between delimiters
3588 as a list of separate values.
3589
3590 Use the ignoreExpr argument to define expressions that may contain
3591 opening or closing characters that should not be treated as opening
3592 or closing characters for nesting, such as quotedString or a comment
3593 expression. Specify multiple expressions using an Or or MatchFirst.
3594 The default is quotedString, but if no expressions are to be ignored,
3595 then pass None for this argument.
3596 """
3597 if opener == closer:
3598 raise ValueError("opening and closing strings cannot be the same")
3599 if content is None:
3600 if isinstance(opener,basestring) and isinstance(closer,basestring):
3601 if len(opener) == 1 and len(closer)==1:
3602 if ignoreExpr is not None:
3603 content = (Combine(OneOrMore(~ignoreExpr +
3604 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3605 ).setParseAction(lambda t:t[0].strip()))
3606 else:
3607 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
3608 ).setParseAction(lambda t:t[0].strip()))
3609 else:
3610 if ignoreExpr is not None:
3611 content = (Combine(OneOrMore(~ignoreExpr +
3612 ~Literal(opener) + ~Literal(closer) +
3613 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3614 ).setParseAction(lambda t:t[0].strip()))
3615 else:
3616 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
3617 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3618 ).setParseAction(lambda t:t[0].strip()))
3619 else:
3620 raise ValueError("opening and closing arguments must be strings if no content expression is given")
3621 ret = Forward()
3622 if ignoreExpr is not None:
3623 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
3624 else:
3625 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
3626 return ret
3627
3628 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3629 """Helper method for defining space-delimited indentation blocks, such as
3630 those used to define block statements in Python source code.
3631
3632 Parameters:
3633 - blockStatementExpr - expression defining syntax of statement that
3634 is repeated within the indented block
3635 - indentStack - list created by caller to manage indentation stack
3636 (multiple statementWithIndentedBlock expressions within a single grammar
3637 should share a common indentStack)
3638 - indent - boolean indicating whether block must be indented beyond the
3639 the current level; set to False for block of left-most statements
3640 (default=True)
3641
3642 A valid block must contain at least one blockStatement.
3643 """
3644 def checkPeerIndent(s,l,t):
3645 if l >= len(s): return
3646 curCol = col(l,s)
3647 if curCol != indentStack[-1]:
3648 if curCol > indentStack[-1]:
3649 raise ParseFatalException(s,l,"illegal nesting")
3650 raise ParseException(s,l,"not a peer entry")
3651
3652 def checkSubIndent(s,l,t):
3653 curCol = col(l,s)
3654 if curCol > indentStack[-1]:
3655 indentStack.append( curCol )
3656 else:
3657 raise ParseException(s,l,"not a subentry")
3658
3659 def checkUnindent(s,l,t):
3660 if l >= len(s): return
3661 curCol = col(l,s)
3662 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
3663 raise ParseException(s,l,"not an unindent")
3664 indentStack.pop()
3665
3666 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
3667 INDENT = Empty() + Empty().setParseAction(checkSubIndent)
3668 PEER = Empty().setParseAction(checkPeerIndent)
3669 UNDENT = Empty().setParseAction(checkUnindent)
3670 if indent:
3671 smExpr = Group( Optional(NL) +
3672
3673 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
3674 else:
3675 smExpr = Group( Optional(NL) +
3676 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
3677 blockStatementExpr.ignore(_bslash + LineEnd())
3678 return smExpr
3679
3680 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3681 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3682
3683 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
3684 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline()
3685 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
3686 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
3687
3688
3689 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
3690
3691 htmlComment = Regex(r"<!--[\s\S]*?-->")
3692 restOfLine = Regex(r".*").leaveWhitespace()
3693 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
3694 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
3695
3696 javaStyleComment = cppStyleComment
3697 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3698 _noncomma = "".join( [ c for c in printables if c != "," ] )
3699 _commasepitem = Combine(OneOrMore(Word(_noncomma) +
3700 Optional( Word(" \t") +
3701 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3702 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
3703
3704
3705 if __name__ == "__main__":
3706
3707 - def test( teststring ):
3708 try:
3709 tokens = simpleSQL.parseString( teststring )
3710 tokenlist = tokens.asList()
3711 print (teststring + "->" + str(tokenlist))
3712 print ("tokens = " + str(tokens))
3713 print ("tokens.columns = " + str(tokens.columns))
3714 print ("tokens.tables = " + str(tokens.tables))
3715 print (tokens.asXML("SQL",True))
3716 except ParseBaseException:
3717 err = sys.exc_info()[1]
3718 print (teststring + "->")
3719 print (err.line)
3720 print (" "*(err.column-1) + "^")
3721 print (err)
3722 print()
3723
3724 selectToken = CaselessLiteral( "select" )
3725 fromToken = CaselessLiteral( "from" )
3726
3727 ident = Word( alphas, alphanums + "_$" )
3728 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3729 columnNameList = Group( delimitedList( columnName ) )
3730 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3731 tableNameList = Group( delimitedList( tableName ) )
3732 simpleSQL = ( selectToken + \
3733 ( '*' | columnNameList ).setResultsName( "columns" ) + \
3734 fromToken + \
3735 tableNameList.setResultsName( "tables" ) )
3736
3737 test( "SELECT * from XYZZY, ABC" )
3738 test( "select * from SYS.XYZZY" )
3739 test( "Select A from Sys.dual" )
3740 test( "Select AA,BB,CC from Sys.dual" )
3741 test( "Select A, B, C from Sys.dual" )
3742 test( "Select A, B, C from Sys.dual" )
3743 test( "Xelect A, B, C from Sys.dual" )
3744 test( "Select A, B, C frox Sys.dual" )
3745 test( "Select" )
3746 test( "Select ^^^ frox Sys.dual" )
3747 test( "Select A, B, C from Sys.dual, Table2 " )
3748