Coverage for src/prisma/_vendor/lark_schema_scan_parser.py: 47%
2127 statements
« prev ^ index » next coverage.py v7.2.7, created at 2024-08-27 18:25 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2024-08-27 18:25 +0000
1# The file was automatically generated by Lark v1.1.8
2__version__ = "1.1.8"
4#
5#
6# Lark Stand-alone Generator Tool
7# ----------------------------------
8# Generates a stand-alone LALR(1) parser
9#
10# Git: https://github.com/erezsh/lark
11# Author: Erez Shinan (erezshin@gmail.com)
12#
13#
14# >>> LICENSE
15#
16# This tool and its generated code use a separate license from Lark,
17# and are subject to the terms of the Mozilla Public License, v. 2.0.
18# If a copy of the MPL was not distributed with this
19# file, You can obtain one at https://mozilla.org/MPL/2.0/.
20#
21# If you wish to purchase a commercial license for this tool and its
22# generated code, you may contact me via email or otherwise.
23#
24# If MPL2 is incompatible with your free or open-source project,
25# contact me and we'll work it out.
26#
27#
29from copy import deepcopy
30from abc import ABC, abstractmethod
31from types import ModuleType
32from typing import (
33 TypeVar, Generic, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
34 Union, Iterable, IO, TYPE_CHECKING, overload, Sequence,
35 Pattern as REPattern, ClassVar, Set, Mapping
36)
39class LarkError(Exception):
40 pass
43class ConfigurationError(LarkError, ValueError):
44 pass
47def assert_config(value, options: Collection, msg='Got %r, expected one of %s'):
48 if value not in options: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true
49 raise ConfigurationError(msg % (value, options))
52class GrammarError(LarkError):
53 pass
56class ParseError(LarkError):
57 pass
60class LexError(LarkError):
61 pass
63T = TypeVar('T')
65class UnexpectedInput(LarkError):
66 #--
67 line: int
68 column: int
69 pos_in_stream = None
70 state: Any
71 _terminals_by_name = None
72 interactive_parser: 'InteractiveParser'
74 def get_context(self, text: str, span: int=40) -> str:
75 #--
76 assert self.pos_in_stream is not None, self
77 pos = self.pos_in_stream
78 start = max(pos - span, 0)
79 end = pos + span
80 if not isinstance(text, bytes): 80 ↛ 85line 80 didn't jump to line 85, because the condition on line 80 was never false
81 before = text[start:pos].rsplit('\n', 1)[-1]
82 after = text[pos:end].split('\n', 1)[0]
83 return before + after + '\n' + ' ' * len(before.expandtabs()) + '^\n'
84 else:
85 before = text[start:pos].rsplit(b'\n', 1)[-1]
86 after = text[pos:end].split(b'\n', 1)[0]
87 return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace")
89 def match_examples(self, parse_fn: 'Callable[[str], Tree]',
90 examples: Union[Mapping[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]],
91 token_type_match_fallback: bool=False,
92 use_accepts: bool=True
93 ) -> Optional[T]:
94 #--
95 assert self.state is not None, "Not supported for this exception"
97 if isinstance(examples, Mapping):
98 examples = examples.items()
100 candidate = (None, False)
101 for i, (label, example) in enumerate(examples):
102 assert not isinstance(example, str), "Expecting a list"
104 for j, malformed in enumerate(example):
105 try:
106 parse_fn(malformed)
107 except UnexpectedInput as ut:
108 if ut.state == self.state:
109 if (
110 use_accepts
111 and isinstance(self, UnexpectedToken)
112 and isinstance(ut, UnexpectedToken)
113 and ut.accepts != self.accepts
114 ):
115 logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
116 (self.state, self.accepts, ut.accepts, i, j))
117 continue
118 if (
119 isinstance(self, (UnexpectedToken, UnexpectedEOF))
120 and isinstance(ut, (UnexpectedToken, UnexpectedEOF))
121 ):
122 if ut.token == self.token: ##
124 logger.debug("Exact Match at example [%s][%s]" % (i, j))
125 return label
127 if token_type_match_fallback:
128 ##
130 if (ut.token.type == self.token.type) and not candidate[-1]:
131 logger.debug("Token Type Fallback at example [%s][%s]" % (i, j))
132 candidate = label, True
134 if candidate[0] is None:
135 logger.debug("Same State match at example [%s][%s]" % (i, j))
136 candidate = label, False
138 return candidate[0]
140 def _format_expected(self, expected):
141 if self._terminals_by_name:
142 d = self._terminals_by_name
143 expected = [d[t_name].user_repr() if t_name in d else t_name for t_name in expected]
144 return "Expected one of: \n\t* %s\n" % '\n\t* '.join(expected)
147class UnexpectedEOF(ParseError, UnexpectedInput):
148 #--
149 expected: 'List[Token]'
151 def __init__(self, expected, state=None, terminals_by_name=None):
152 super(UnexpectedEOF, self).__init__()
154 self.expected = expected
155 self.state = state
156 from .lexer import Token
157 self.token = Token("<EOF>", "") ##
159 self.pos_in_stream = -1
160 self.line = -1
161 self.column = -1
162 self._terminals_by_name = terminals_by_name
165 def __str__(self):
166 message = "Unexpected end-of-input. "
167 message += self._format_expected(self.expected)
168 return message
171class UnexpectedCharacters(LexError, UnexpectedInput):
172 #--
174 allowed: Set[str]
175 considered_tokens: Set[Any]
177 def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None,
178 terminals_by_name=None, considered_rules=None):
179 super(UnexpectedCharacters, self).__init__()
181 ##
183 self.line = line
184 self.column = column
185 self.pos_in_stream = lex_pos
186 self.state = state
187 self._terminals_by_name = terminals_by_name
189 self.allowed = allowed
190 self.considered_tokens = considered_tokens
191 self.considered_rules = considered_rules
192 self.token_history = token_history
194 if isinstance(seq, bytes): 194 ↛ 195line 194 didn't jump to line 195, because the condition on line 194 was never true
195 self.char = seq[lex_pos:lex_pos + 1].decode("ascii", "backslashreplace")
196 else:
197 self.char = seq[lex_pos]
198 self._context = self.get_context(seq)
201 def __str__(self):
202 message = "No terminal matches '%s' in the current parser context, at line %d col %d" % (self.char, self.line, self.column)
203 message += '\n\n' + self._context
204 if self.allowed:
205 message += self._format_expected(self.allowed)
206 if self.token_history:
207 message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in self.token_history)
208 return message
211class UnexpectedToken(ParseError, UnexpectedInput):
212 #--
214 expected: Set[str]
215 considered_rules: Set[str]
217 def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None):
218 super(UnexpectedToken, self).__init__()
220 ##
222 self.line = getattr(token, 'line', '?')
223 self.column = getattr(token, 'column', '?')
224 self.pos_in_stream = getattr(token, 'start_pos', None)
225 self.state = state
227 self.token = token
228 self.expected = expected ##
230 self._accepts = NO_VALUE
231 self.considered_rules = considered_rules
232 self.interactive_parser = interactive_parser
233 self._terminals_by_name = terminals_by_name
234 self.token_history = token_history
237 @property
238 def accepts(self) -> Set[str]:
239 if self._accepts is NO_VALUE:
240 self._accepts = self.interactive_parser and self.interactive_parser.accepts()
241 return self._accepts
243 def __str__(self):
244 message = ("Unexpected token %r at line %s, column %s.\n%s"
245 % (self.token, self.line, self.column, self._format_expected(self.accepts or self.expected)))
246 if self.token_history:
247 message += "Previous tokens: %r\n" % self.token_history
249 return message
253class VisitError(LarkError):
254 #--
256 obj: 'Union[Tree, Token]'
257 orig_exc: Exception
259 def __init__(self, rule, obj, orig_exc):
260 message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc)
261 super(VisitError, self).__init__(message)
263 self.rule = rule
264 self.obj = obj
265 self.orig_exc = orig_exc
268class MissingVariableError(LarkError):
269 pass
272import sys, re
273import logging
275logger: logging.Logger = logging.getLogger("lark")
276logger.addHandler(logging.StreamHandler())
277##
279##
281logger.setLevel(logging.CRITICAL)
284NO_VALUE = object()
286T = TypeVar("T")
289def classify(seq: Iterable, key: Optional[Callable] = None, value: Optional[Callable] = None) -> Dict:
290 d: Dict[Any, Any] = {}
291 for item in seq:
292 k = key(item) if (key is not None) else item
293 v = value(item) if (value is not None) else item
294 try:
295 d[k].append(v)
296 except KeyError:
297 d[k] = [v]
298 return d
301def _deserialize(data: Any, namespace: Dict[str, Any], memo: Dict) -> Any:
302 if isinstance(data, dict):
303 if '__type__' in data: ##
305 class_ = namespace[data['__type__']]
306 return class_.deserialize(data, memo)
307 elif '@' in data:
308 return memo[data['@']]
309 return {key:_deserialize(value, namespace, memo) for key, value in data.items()}
310 elif isinstance(data, list):
311 return [_deserialize(value, namespace, memo) for value in data]
312 return data
315_T = TypeVar("_T", bound="Serialize")
317class Serialize:
318 #--
320 def memo_serialize(self, types_to_memoize: List) -> Any:
321 memo = SerializeMemoizer(types_to_memoize)
322 return self.serialize(memo), memo.serialize()
324 def serialize(self, memo = None) -> Dict[str, Any]:
325 if memo and memo.in_types(self):
326 return {'@': memo.memoized.get(self)}
328 fields = getattr(self, '__serialize_fields__')
329 res = {f: _serialize(getattr(self, f), memo) for f in fields}
330 res['__type__'] = type(self).__name__
331 if hasattr(self, '_serialize'):
332 self._serialize(res, memo) ##
334 return res
336 @classmethod
337 def deserialize(cls: Type[_T], data: Dict[str, Any], memo: Dict[int, Any]) -> _T:
338 namespace = getattr(cls, '__serialize_namespace__', [])
339 namespace = {c.__name__:c for c in namespace}
341 fields = getattr(cls, '__serialize_fields__')
343 if '@' in data:
344 return memo[data['@']]
346 inst = cls.__new__(cls)
347 for f in fields:
348 try:
349 setattr(inst, f, _deserialize(data[f], namespace, memo))
350 except KeyError as e:
351 raise KeyError("Cannot find key for class", cls, e)
353 if hasattr(inst, '_deserialize'):
354 inst._deserialize() ##
357 return inst
360class SerializeMemoizer(Serialize):
361 #--
363 __serialize_fields__ = 'memoized',
365 def __init__(self, types_to_memoize: List) -> None:
366 self.types_to_memoize = tuple(types_to_memoize)
367 self.memoized = Enumerator()
369 def in_types(self, value: Serialize) -> bool:
370 return isinstance(value, self.types_to_memoize)
372 def serialize(self) -> Dict[int, Any]: ##
374 return _serialize(self.memoized.reversed(), None)
376 @classmethod
377 def deserialize(cls, data: Dict[int, Any], namespace: Dict[str, Any], memo: Dict[Any, Any]) -> Dict[int, Any]: ##
379 return _deserialize(data, namespace, memo)
382try:
383 import regex
384 _has_regex = True
385except ImportError:
386 _has_regex = False
388if sys.version_info >= (3, 11):
389 import re._parser as sre_parse
390 import re._constants as sre_constants
391else:
392 import sre_parse
393 import sre_constants
395categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')
397def get_regexp_width(expr: str) -> Union[Tuple[int, int], List[int]]:
398 if _has_regex:
399 ##
401 ##
403 ##
405 regexp_final = re.sub(categ_pattern, 'A', expr)
406 else:
407 if re.search(categ_pattern, expr):
408 raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr)
409 regexp_final = expr
410 try:
411 ##
413 return [int(x) for x in sre_parse.parse(regexp_final).getwidth()] ##
415 except sre_constants.error:
416 if not _has_regex:
417 raise ValueError(expr)
418 else:
419 ##
421 ##
423 c = regex.compile(regexp_final)
424 if c.match('') is None:
425 ##
427 return 1, int(sre_constants.MAXREPEAT)
428 else:
429 return 0, int(sre_constants.MAXREPEAT)
432from collections import OrderedDict
434class Meta:
436 empty: bool
437 line: int
438 column: int
439 start_pos: int
440 end_line: int
441 end_column: int
442 end_pos: int
443 orig_expansion: 'List[TerminalDef]'
444 match_tree: bool
446 def __init__(self):
447 self.empty = True
450_Leaf_T = TypeVar("_Leaf_T")
451Branch = Union[_Leaf_T, 'Tree[_Leaf_T]']
454class Tree(Generic[_Leaf_T]):
455 #--
457 data: str
458 children: 'List[Branch[_Leaf_T]]'
460 def __init__(self, data: str, children: 'List[Branch[_Leaf_T]]', meta: Optional[Meta]=None) -> None:
461 self.data = data
462 self.children = children
463 self._meta = meta
465 @property
466 def meta(self) -> Meta:
467 if self._meta is None:
468 self._meta = Meta()
469 return self._meta
471 def __repr__(self):
472 return 'Tree(%r, %r)' % (self.data, self.children)
474 def _pretty_label(self):
475 return self.data
477 def _pretty(self, level, indent_str):
478 yield f'{indent_str*level}{self._pretty_label()}'
479 if len(self.children) == 1 and not isinstance(self.children[0], Tree):
480 yield f'\t{self.children[0]}\n'
481 else:
482 yield '\n'
483 for n in self.children:
484 if isinstance(n, Tree):
485 yield from n._pretty(level+1, indent_str)
486 else:
487 yield f'{indent_str*(level+1)}{n}\n'
489 def pretty(self, indent_str: str=' ') -> str:
490 #--
491 return ''.join(self._pretty(0, indent_str))
493 def __rich__(self, parent:Optional['rich.tree.Tree']=None) -> 'rich.tree.Tree':
494 #--
495 return self._rich(parent)
497 def _rich(self, parent):
498 if parent:
499 tree = parent.add(f'[bold]{self.data}[/bold]')
500 else:
501 import rich.tree
502 tree = rich.tree.Tree(self.data)
504 for c in self.children:
505 if isinstance(c, Tree):
506 c._rich(tree)
507 else:
508 tree.add(f'[green]{c}[/green]')
510 return tree
512 def __eq__(self, other):
513 try:
514 return self.data == other.data and self.children == other.children
515 except AttributeError:
516 return False
518 def __ne__(self, other):
519 return not (self == other)
521 def __hash__(self) -> int:
522 return hash((self.data, tuple(self.children)))
524 def iter_subtrees(self) -> 'Iterator[Tree[_Leaf_T]]':
525 #--
526 queue = [self]
527 subtrees = OrderedDict()
528 for subtree in queue:
529 subtrees[id(subtree)] = subtree
530 ##
532 queue += [c for c in reversed(subtree.children) ##
534 if isinstance(c, Tree) and id(c) not in subtrees]
536 del queue
537 return reversed(list(subtrees.values()))
539 def iter_subtrees_topdown(self):
540 #--
541 stack = [self]
542 stack_append = stack.append
543 stack_pop = stack.pop
544 while stack:
545 node = stack_pop()
546 if not isinstance(node, Tree):
547 continue
548 yield node
549 for child in reversed(node.children):
550 stack_append(child)
552 def find_pred(self, pred: 'Callable[[Tree[_Leaf_T]], bool]') -> 'Iterator[Tree[_Leaf_T]]':
553 #--
554 return filter(pred, self.iter_subtrees())
556 def find_data(self, data: str) -> 'Iterator[Tree[_Leaf_T]]':
557 #--
558 return self.find_pred(lambda t: t.data == data)
561from functools import wraps, update_wrapper
562from inspect import getmembers, getmro
564_Return_T = TypeVar('_Return_T')
565_Return_V = TypeVar('_Return_V')
566_Leaf_T = TypeVar('_Leaf_T')
567_Leaf_U = TypeVar('_Leaf_U')
568_R = TypeVar('_R')
569_FUNC = Callable[..., _Return_T]
570_DECORATED = Union[_FUNC, type]
572class _DiscardType:
573 #--
575 def __repr__(self):
576 return "lark.visitors.Discard"
578Discard = _DiscardType()
580##
583class _Decoratable:
584 #--
586 @classmethod
587 def _apply_v_args(cls, visit_wrapper):
588 mro = getmro(cls)
589 assert mro[0] is cls
590 libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)}
591 for name, value in getmembers(cls):
593 ##
595 if name.startswith('_') or (name in libmembers and name not in cls.__dict__):
596 continue
597 if not callable(value):
598 continue
600 ##
602 if isinstance(cls.__dict__[name], _VArgsWrapper):
603 continue
605 setattr(cls, name, _VArgsWrapper(cls.__dict__[name], visit_wrapper))
606 return cls
608 def __class_getitem__(cls, _):
609 return cls
612class Transformer(_Decoratable, ABC, Generic[_Leaf_T, _Return_T]):
613 #--
614 __visit_tokens__ = True ##
617 def __init__(self, visit_tokens: bool=True) -> None:
618 self.__visit_tokens__ = visit_tokens
620 def _call_userfunc(self, tree, new_children=None):
621 ##
623 children = new_children if new_children is not None else tree.children
624 try:
625 f = getattr(self, tree.data)
626 except AttributeError:
627 return self.__default__(tree.data, children, tree.meta)
628 else:
629 try:
630 wrapper = getattr(f, 'visit_wrapper', None)
631 if wrapper is not None:
632 return f.visit_wrapper(f, tree.data, children, tree.meta)
633 else:
634 return f(children)
635 except GrammarError:
636 raise
637 except Exception as e:
638 raise VisitError(tree.data, tree, e)
640 def _call_userfunc_token(self, token):
641 try:
642 f = getattr(self, token.type)
643 except AttributeError:
644 return self.__default_token__(token)
645 else:
646 try:
647 return f(token)
648 except GrammarError:
649 raise
650 except Exception as e:
651 raise VisitError(token.type, token, e)
653 def _transform_children(self, children):
654 for c in children:
655 if isinstance(c, Tree):
656 res = self._transform_tree(c)
657 elif self.__visit_tokens__ and isinstance(c, Token):
658 res = self._call_userfunc_token(c)
659 else:
660 res = c
662 if res is not Discard:
663 yield res
665 def _transform_tree(self, tree):
666 children = list(self._transform_children(tree.children))
667 return self._call_userfunc(tree, children)
669 def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
670 #--
671 return self._transform_tree(tree)
673 def __mul__(
674 self: 'Transformer[_Leaf_T, Tree[_Leaf_U]]',
675 other: 'Union[Transformer[_Leaf_U, _Return_V], TransformerChain[_Leaf_U, _Return_V,]]'
676 ) -> 'TransformerChain[_Leaf_T, _Return_V]':
677 #--
678 return TransformerChain(self, other)
680 def __default__(self, data, children, meta):
681 #--
682 return Tree(data, children, meta)
684 def __default_token__(self, token):
685 #--
686 return token
689def merge_transformers(base_transformer=None, **transformers_to_merge):
690 #--
691 if base_transformer is None:
692 base_transformer = Transformer()
693 for prefix, transformer in transformers_to_merge.items():
694 for method_name in dir(transformer):
695 method = getattr(transformer, method_name)
696 if not callable(method):
697 continue
698 if method_name.startswith("_") or method_name == "transform":
699 continue
700 prefixed_method = prefix + "__" + method_name
701 if hasattr(base_transformer, prefixed_method):
702 raise AttributeError("Cannot merge: method '%s' appears more than once" % prefixed_method)
704 setattr(base_transformer, prefixed_method, method)
706 return base_transformer
709class InlineTransformer(Transformer): ##
711 def _call_userfunc(self, tree, new_children=None):
712 ##
714 children = new_children if new_children is not None else tree.children
715 try:
716 f = getattr(self, tree.data)
717 except AttributeError:
718 return self.__default__(tree.data, children, tree.meta)
719 else:
720 return f(*children)
723class TransformerChain(Generic[_Leaf_T, _Return_T]):
725 transformers: 'Tuple[Union[Transformer, TransformerChain], ...]'
727 def __init__(self, *transformers: 'Union[Transformer, TransformerChain]') -> None:
728 self.transformers = transformers
730 def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
731 for t in self.transformers:
732 tree = t.transform(tree)
733 return cast(_Return_T, tree)
735 def __mul__(
736 self: 'TransformerChain[_Leaf_T, Tree[_Leaf_U]]',
737 other: 'Union[Transformer[_Leaf_U, _Return_V], TransformerChain[_Leaf_U, _Return_V]]'
738 ) -> 'TransformerChain[_Leaf_T, _Return_V]':
739 return TransformerChain(*self.transformers + (other,))
742class Transformer_InPlace(Transformer[_Leaf_T, _Return_T]):
743 #--
744 def _transform_tree(self, tree): ##
746 return self._call_userfunc(tree)
748 def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
749 for subtree in tree.iter_subtrees():
750 subtree.children = list(self._transform_children(subtree.children))
752 return self._transform_tree(tree)
755class Transformer_NonRecursive(Transformer[_Leaf_T, _Return_T]):
756 #--
758 def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
759 ##
761 rev_postfix = []
762 q: List[Branch[_Leaf_T]] = [tree]
763 while q:
764 t = q.pop()
765 rev_postfix.append(t)
766 if isinstance(t, Tree):
767 q += t.children
769 ##
771 stack: List = []
772 for x in reversed(rev_postfix):
773 if isinstance(x, Tree):
774 size = len(x.children)
775 if size:
776 args = stack[-size:]
777 del stack[-size:]
778 else:
779 args = []
781 res = self._call_userfunc(x, args)
782 if res is not Discard:
783 stack.append(res)
785 elif self.__visit_tokens__ and isinstance(x, Token):
786 res = self._call_userfunc_token(x)
787 if res is not Discard:
788 stack.append(res)
789 else:
790 stack.append(x)
792 result, = stack ##
794 ##
796 ##
798 ##
800 return cast(_Return_T, result)
803class Transformer_InPlaceRecursive(Transformer):
804 #--
805 def _transform_tree(self, tree):
806 tree.children = list(self._transform_children(tree.children))
807 return self._call_userfunc(tree)
810##
813class VisitorBase:
814 def _call_userfunc(self, tree):
815 return getattr(self, tree.data, self.__default__)(tree)
817 def __default__(self, tree):
818 #--
819 return tree
821 def __class_getitem__(cls, _):
822 return cls
825class Visitor(VisitorBase, ABC, Generic[_Leaf_T]):
826 #--
828 def visit(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
829 #--
830 for subtree in tree.iter_subtrees():
831 self._call_userfunc(subtree)
832 return tree
834 def visit_topdown(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
835 #--
836 for subtree in tree.iter_subtrees_topdown():
837 self._call_userfunc(subtree)
838 return tree
841class Visitor_Recursive(VisitorBase, Generic[_Leaf_T]):
842 #--
844 def visit(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
845 #--
846 for child in tree.children:
847 if isinstance(child, Tree):
848 self.visit(child)
850 self._call_userfunc(tree)
851 return tree
853 def visit_topdown(self,tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
854 #--
855 self._call_userfunc(tree)
857 for child in tree.children:
858 if isinstance(child, Tree):
859 self.visit_topdown(child)
861 return tree
864class Interpreter(_Decoratable, ABC, Generic[_Leaf_T, _Return_T]):
865 #--
867 def visit(self, tree: Tree[_Leaf_T]) -> _Return_T:
868 ##
870 ##
872 ##
874 return self._visit_tree(tree)
876 def _visit_tree(self, tree: Tree[_Leaf_T]):
877 f = getattr(self, tree.data)
878 wrapper = getattr(f, 'visit_wrapper', None)
879 if wrapper is not None:
880 return f.visit_wrapper(f, tree.data, tree.children, tree.meta)
881 else:
882 return f(tree)
884 def visit_children(self, tree: Tree[_Leaf_T]) -> List:
885 return [self._visit_tree(child) if isinstance(child, Tree) else child
886 for child in tree.children]
888 def __getattr__(self, name):
889 return self.__default__
891 def __default__(self, tree):
892 return self.visit_children(tree)
895_InterMethod = Callable[[Type[Interpreter], _Return_T], _R]
897def visit_children_decor(func: _InterMethod) -> _InterMethod:
898 #--
899 @wraps(func)
900 def inner(cls, tree):
901 values = cls.visit_children(tree)
902 return func(cls, values)
903 return inner
905##
908def _apply_v_args(obj, visit_wrapper):
909 try:
910 _apply = obj._apply_v_args
911 except AttributeError:
912 return _VArgsWrapper(obj, visit_wrapper)
913 else:
914 return _apply(visit_wrapper)
917class _VArgsWrapper:
918 #--
919 base_func: Callable
921 def __init__(self, func: Callable, visit_wrapper: Callable[[Callable, str, list, Any], Any]):
922 if isinstance(func, _VArgsWrapper):
923 func = func.base_func
924 ##
926 self.base_func = func ##
928 self.visit_wrapper = visit_wrapper
929 update_wrapper(self, func)
931 def __call__(self, *args, **kwargs):
932 return self.base_func(*args, **kwargs)
934 def __get__(self, instance, owner=None):
935 try:
936 ##
938 ##
940 g = type(self.base_func).__get__
941 except AttributeError:
942 return self
943 else:
944 return _VArgsWrapper(g(self.base_func, instance, owner), self.visit_wrapper)
946 def __set_name__(self, owner, name):
947 try:
948 f = type(self.base_func).__set_name__
949 except AttributeError:
950 return
951 else:
952 f(self.base_func, owner, name)
955def _vargs_inline(f, _data, children, _meta):
956 return f(*children)
957def _vargs_meta_inline(f, _data, children, meta):
958 return f(meta, *children)
959def _vargs_meta(f, _data, children, meta):
960 return f(meta, children)
961def _vargs_tree(f, data, children, meta):
962 return f(Tree(data, children, meta))
965def v_args(inline: bool = False, meta: bool = False, tree: bool = False, wrapper: Optional[Callable] = None) -> Callable[[_DECORATED], _DECORATED]:
966 #--
967 if tree and (meta or inline):
968 raise ValueError("Visitor functions cannot combine 'tree' with 'meta' or 'inline'.")
970 func = None
971 if meta:
972 if inline:
973 func = _vargs_meta_inline
974 else:
975 func = _vargs_meta
976 elif inline:
977 func = _vargs_inline
978 elif tree:
979 func = _vargs_tree
981 if wrapper is not None:
982 if func is not None:
983 raise ValueError("Cannot use 'wrapper' along with 'tree', 'meta' or 'inline'.")
984 func = wrapper
986 def _visitor_args_dec(obj):
987 return _apply_v_args(obj, func)
988 return _visitor_args_dec
992TOKEN_DEFAULT_PRIORITY = 0
995class Symbol(Serialize):
996 __slots__ = ('name',)
998 name: str
999 is_term: ClassVar[bool] = NotImplemented
1001 def __init__(self, name: str) -> None:
1002 self.name = name
1004 def __eq__(self, other):
1005 assert isinstance(other, Symbol), other
1006 return self.is_term == other.is_term and self.name == other.name
1008 def __ne__(self, other):
1009 return not (self == other)
1011 def __hash__(self):
1012 return hash(self.name)
1014 def __repr__(self):
1015 return '%s(%r)' % (type(self).__name__, self.name)
1017 fullrepr = property(__repr__)
1019 def renamed(self, f):
1020 return type(self)(f(self.name))
1023class Terminal(Symbol):
1024 __serialize_fields__ = 'name', 'filter_out'
1026 is_term: ClassVar[bool] = True
1028 def __init__(self, name, filter_out=False):
1029 self.name = name
1030 self.filter_out = filter_out
1032 @property
1033 def fullrepr(self):
1034 return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out)
1036 def renamed(self, f):
1037 return type(self)(f(self.name), self.filter_out)
1040class NonTerminal(Symbol):
1041 __serialize_fields__ = 'name',
1043 is_term: ClassVar[bool] = False
1046class RuleOptions(Serialize):
1047 __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices'
1049 keep_all_tokens: bool
1050 expand1: bool
1051 priority: Optional[int]
1052 template_source: Optional[str]
1053 empty_indices: Tuple[bool, ...]
1055 def __init__(self, keep_all_tokens: bool=False, expand1: bool=False, priority: Optional[int]=None, template_source: Optional[str]=None, empty_indices: Tuple[bool, ...]=()) -> None:
1056 self.keep_all_tokens = keep_all_tokens
1057 self.expand1 = expand1
1058 self.priority = priority
1059 self.template_source = template_source
1060 self.empty_indices = empty_indices
1062 def __repr__(self):
1063 return 'RuleOptions(%r, %r, %r, %r)' % (
1064 self.keep_all_tokens,
1065 self.expand1,
1066 self.priority,
1067 self.template_source
1068 )
1071class Rule(Serialize):
1072 #--
1073 __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash')
1075 __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options'
1076 __serialize_namespace__ = Terminal, NonTerminal, RuleOptions
1078 origin: NonTerminal
1079 expansion: Sequence[Symbol]
1080 order: int
1081 alias: Optional[str]
1082 options: RuleOptions
1083 _hash: int
1085 def __init__(self, origin: NonTerminal, expansion: Sequence[Symbol],
1086 order: int=0, alias: Optional[str]=None, options: Optional[RuleOptions]=None):
1087 self.origin = origin
1088 self.expansion = expansion
1089 self.alias = alias
1090 self.order = order
1091 self.options = options or RuleOptions()
1092 self._hash = hash((self.origin, tuple(self.expansion)))
1094 def _deserialize(self):
1095 self._hash = hash((self.origin, tuple(self.expansion)))
1097 def __str__(self):
1098 return '<%s : %s>' % (self.origin.name, ' '.join(x.name for x in self.expansion))
1100 def __repr__(self):
1101 return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options)
1103 def __hash__(self):
1104 return self._hash
1106 def __eq__(self, other):
1107 if not isinstance(other, Rule): 1107 ↛ 1109line 1107 didn't jump to line 1109, because the condition on line 1107 was never false
1108 return False
1109 return self.origin == other.origin and self.expansion == other.expansion
1113from copy import copy
1115try: ##
1117 has_interegular = bool(interegular)
1118except NameError:
1119 has_interegular = False
1121class Pattern(Serialize, ABC):
1122 #--
1124 value: str
1125 flags: Collection[str]
1126 raw: Optional[str]
1127 type: ClassVar[str]
1129 def __init__(self, value: str, flags: Collection[str] = (), raw: Optional[str] = None) -> None:
1130 self.value = value
1131 self.flags = frozenset(flags)
1132 self.raw = raw
1134 def __repr__(self):
1135 return repr(self.to_regexp())
1137 ##
1139 def __hash__(self):
1140 return hash((type(self), self.value, self.flags))
1142 def __eq__(self, other):
1143 return type(self) == type(other) and self.value == other.value and self.flags == other.flags
1145 @abstractmethod
1146 def to_regexp(self) -> str:
1147 raise NotImplementedError()
1149 @property
1150 @abstractmethod
1151 def min_width(self) -> int:
1152 raise NotImplementedError()
1154 @property
1155 @abstractmethod
1156 def max_width(self) -> int:
1157 raise NotImplementedError()
1159 def _get_flags(self, value):
1160 for f in self.flags: 1160 ↛ 1161line 1160 didn't jump to line 1161, because the loop on line 1160 never started
1161 value = ('(?%s:%s)' % (f, value))
1162 return value
1165class PatternStr(Pattern):
1166 __serialize_fields__ = 'value', 'flags', 'raw'
1168 type: ClassVar[str] = "str"
1170 def to_regexp(self) -> str:
1171 return self._get_flags(re.escape(self.value))
1173 @property
1174 def min_width(self) -> int:
1175 return len(self.value)
1177 @property
1178 def max_width(self) -> int:
1179 return len(self.value)
1182class PatternRE(Pattern):
1183 __serialize_fields__ = 'value', 'flags', 'raw', '_width'
1185 type: ClassVar[str] = "re"
1187 def to_regexp(self) -> str:
1188 return self._get_flags(self.value)
1190 _width = None
1191 def _get_width(self):
1192 if self._width is None: 1192 ↛ 1193line 1192 didn't jump to line 1193, because the condition on line 1192 was never true
1193 self._width = get_regexp_width(self.to_regexp())
1194 return self._width
1196 @property
1197 def min_width(self) -> int:
1198 return self._get_width()[0]
1200 @property
1201 def max_width(self) -> int:
1202 return self._get_width()[1]
1205class TerminalDef(Serialize):
1206 #--
1207 __serialize_fields__ = 'name', 'pattern', 'priority'
1208 __serialize_namespace__ = PatternStr, PatternRE
1210 name: str
1211 pattern: Pattern
1212 priority: int
1214 def __init__(self, name: str, pattern: Pattern, priority: int = TOKEN_DEFAULT_PRIORITY) -> None:
1215 assert isinstance(pattern, Pattern), pattern
1216 self.name = name
1217 self.pattern = pattern
1218 self.priority = priority
1220 def __repr__(self):
1221 return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)
1223 def user_repr(self) -> str:
1224 if self.name.startswith('__'): ##
1226 return self.pattern.raw or self.name
1227 else:
1228 return self.name
1230_T = TypeVar('_T', bound="Token")
1232class Token(str):
1233 #--
1234 __slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')
1236 __match_args__ = ('type', 'value')
1238 type: str
1239 start_pos: Optional[int]
1240 value: Any
1241 line: Optional[int]
1242 column: Optional[int]
1243 end_line: Optional[int]
1244 end_column: Optional[int]
1245 end_pos: Optional[int]
1248 @overload
1249 def __new__(
1250 cls,
1251 type: str,
1252 value: Any,
1253 start_pos: Optional[int] = None,
1254 line: Optional[int] = None,
1255 column: Optional[int] = None,
1256 end_line: Optional[int] = None,
1257 end_column: Optional[int] = None,
1258 end_pos: Optional[int] = None
1259 ) -> 'Token':
1260 ...
1262 @overload
1263 def __new__(
1264 cls,
1265 type_: str,
1266 value: Any,
1267 start_pos: Optional[int] = None,
1268 line: Optional[int] = None,
1269 column: Optional[int] = None,
1270 end_line: Optional[int] = None,
1271 end_column: Optional[int] = None,
1272 end_pos: Optional[int] = None
1273 ) -> 'Token': ...
1275 def __new__(cls, *args, **kwargs):
1276 if "type_" in kwargs: 1276 ↛ 1277line 1276 didn't jump to line 1277, because the condition on line 1276 was never true
1277 warnings.warn("`type_` is deprecated use `type` instead", DeprecationWarning)
1279 if "type" in kwargs:
1280 raise TypeError("Error: using both 'type' and the deprecated 'type_' as arguments.")
1281 kwargs["type"] = kwargs.pop("type_")
1283 return cls._future_new(*args, **kwargs)
1286 @classmethod
1287 def _future_new(cls, type, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None):
1288 inst = super(Token, cls).__new__(cls, value)
1290 inst.type = type
1291 inst.start_pos = start_pos
1292 inst.value = value
1293 inst.line = line
1294 inst.column = column
1295 inst.end_line = end_line
1296 inst.end_column = end_column
1297 inst.end_pos = end_pos
1298 return inst
1300 @overload
1301 def update(self, type: Optional[str] = None, value: Optional[Any] = None) -> 'Token':
1302 ...
1304 @overload
1305 def update(self, type_: Optional[str] = None, value: Optional[Any] = None) -> 'Token':
1306 ...
1308 def update(self, *args, **kwargs):
1309 if "type_" in kwargs:
1310 warnings.warn("`type_` is deprecated use `type` instead", DeprecationWarning)
1312 if "type" in kwargs:
1313 raise TypeError("Error: using both 'type' and the deprecated 'type_' as arguments.")
1314 kwargs["type"] = kwargs.pop("type_")
1316 return self._future_update(*args, **kwargs)
1318 def _future_update(self, type: Optional[str] = None, value: Optional[Any] = None) -> 'Token':
1319 return Token.new_borrow_pos(
1320 type if type is not None else self.type,
1321 value if value is not None else self.value,
1322 self
1323 )
1325 @classmethod
1326 def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: 'Token') -> _T:
1327 return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)
1329 def __reduce__(self):
1330 return (self.__class__, (self.type, self.value, self.start_pos, self.line, self.column))
1332 def __repr__(self):
1333 return 'Token(%r, %r)' % (self.type, self.value)
1335 def __deepcopy__(self, memo):
1336 return Token(self.type, self.value, self.start_pos, self.line, self.column)
1338 def __eq__(self, other):
1339 if isinstance(other, Token) and self.type != other.type: 1339 ↛ 1340line 1339 didn't jump to line 1340, because the condition on line 1339 was never true
1340 return False
1342 return str.__eq__(self, other)
1344 __hash__ = str.__hash__
1347class LineCounter:
1348 #--
1350 __slots__ = 'char_pos', 'line', 'column', 'line_start_pos', 'newline_char'
1352 def __init__(self, newline_char):
1353 self.newline_char = newline_char
1354 self.char_pos = 0
1355 self.line = 1
1356 self.column = 1
1357 self.line_start_pos = 0
1359 def __eq__(self, other):
1360 if not isinstance(other, LineCounter):
1361 return NotImplemented
1363 return self.char_pos == other.char_pos and self.newline_char == other.newline_char
1365 def feed(self, token: Token, test_newline=True):
1366 #--
1367 if test_newline:
1368 newlines = token.count(self.newline_char)
1369 if newlines:
1370 self.line += newlines
1371 self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1
1373 self.char_pos += len(token)
1374 self.column = self.char_pos - self.line_start_pos + 1
1377class UnlessCallback:
1378 def __init__(self, scanner):
1379 self.scanner = scanner
1381 def __call__(self, t):
1382 res = self.scanner.match(t.value, 0)
1383 if res: 1383 ↛ 1384line 1383 didn't jump to line 1384, because the condition on line 1383 was never true
1384 _value, t.type = res
1385 return t
1388class CallChain:
1389 def __init__(self, callback1, callback2, cond):
1390 self.callback1 = callback1
1391 self.callback2 = callback2
1392 self.cond = cond
1394 def __call__(self, t):
1395 t2 = self.callback1(t)
1396 return self.callback2(t) if self.cond(t2) else t2
1399def _get_match(re_, regexp, s, flags):
1400 m = re_.match(regexp, s, flags)
1401 if m:
1402 return m.group(0)
1404def _create_unless(terminals, g_regex_flags, re_, use_bytes):
1405 tokens_by_type = classify(terminals, lambda t: type(t.pattern))
1406 assert len(tokens_by_type) <= 2, tokens_by_type.keys()
1407 embedded_strs = set()
1408 callback = {}
1409 for retok in tokens_by_type.get(PatternRE, []):
1410 unless = []
1411 for strtok in tokens_by_type.get(PatternStr, []):
1412 if strtok.priority != retok.priority: 1412 ↛ 1413line 1412 didn't jump to line 1413, because the condition on line 1412 was never true
1413 continue
1414 s = strtok.pattern.value
1415 if s == _get_match(re_, retok.pattern.to_regexp(), s, g_regex_flags):
1416 unless.append(strtok)
1417 if strtok.pattern.flags <= retok.pattern.flags: 1417 ↛ 1411line 1417 didn't jump to line 1411, because the condition on line 1417 was never false
1418 embedded_strs.add(strtok)
1419 if unless:
1420 callback[retok.name] = UnlessCallback(Scanner(unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes))
1422 new_terminals = [t for t in terminals if t not in embedded_strs]
1423 return new_terminals, callback
1426class Scanner:
1427 def __init__(self, terminals, g_regex_flags, re_, use_bytes, match_whole=False):
1428 self.terminals = terminals
1429 self.g_regex_flags = g_regex_flags
1430 self.re_ = re_
1431 self.use_bytes = use_bytes
1432 self.match_whole = match_whole
1434 self.allowed_types = {t.name for t in self.terminals}
1436 self._mres = self._build_mres(terminals, len(terminals))
1438 def _build_mres(self, terminals, max_size):
1439 ##
1441 ##
1443 ##
1445 postfix = '$' if self.match_whole else ''
1446 mres = []
1447 while terminals:
1448 pattern = u'|'.join(u'(?P<%s>%s)' % (t.name, t.pattern.to_regexp() + postfix) for t in terminals[:max_size])
1449 if self.use_bytes: 1449 ↛ 1450line 1449 didn't jump to line 1450, because the condition on line 1449 was never true
1450 pattern = pattern.encode('latin-1')
1451 try:
1452 mre = self.re_.compile(pattern, self.g_regex_flags)
1453 except AssertionError: ##
1455 return self._build_mres(terminals, max_size // 2)
1457 mres.append(mre)
1458 terminals = terminals[max_size:]
1459 return mres
1461 def match(self, text, pos):
1462 for mre in self._mres:
1463 m = mre.match(text, pos)
1464 if m:
1465 return m.group(0), m.lastgroup
1467 def search(self, text, pos):
1468 best = None, float("inf")
1469 for mre in self._mres:
1470 mre: re.Pattern
1471 m = mre.search(text, pos)
1472 if m:
1473 if m.start() < best[1]: 1473 ↛ 1469line 1473 didn't jump to line 1469, because the condition on line 1473 was never false
1474 best = (m.group(0), m.lastgroup), m.start()
1475 if best[0] is None:
1476 return None
1477 else:
1478 return best[1]
1481def _regexp_has_newline(r: str):
1482 #--
1483 return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r)
1486class LexerState:
1487 #--
1489 __slots__ = 'text', 'line_ctr', 'last_token'
1491 text: str
1492 line_ctr: LineCounter
1493 last_token: Optional[Token]
1495 def __init__(self, text: str, line_ctr: Optional[LineCounter]=None, last_token: Optional[Token]=None):
1496 self.text = text
1497 self.line_ctr = line_ctr or LineCounter(b'\n' if isinstance(text, bytes) else '\n')
1498 self.last_token = last_token
1500 def __eq__(self, other):
1501 if not isinstance(other, LexerState):
1502 return NotImplemented
1504 return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token
1506 def __copy__(self):
1507 return type(self)(self.text, copy(self.line_ctr), self.last_token)
1510class LexerThread:
1511 #--
1513 def __init__(self, lexer: 'Lexer', lexer_state: LexerState):
1514 self.lexer = lexer
1515 self.state = lexer_state
1517 @classmethod
1518 def from_text(cls, lexer: 'Lexer', text: str) -> 'LexerThread':
1519 return cls(lexer, LexerState(text))
1521 def lex(self, parser_state):
1522 return self.lexer.lex(self.state, parser_state)
1524 def __copy__(self):
1525 return type(self)(self.lexer, copy(self.state))
1527 _Token = Token
1530_Callback = Callable[[Token], Token]
1532class Lexer(ABC):
1533 #--
1534 @abstractmethod
1535 def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]:
1536 return NotImplemented
1538 def make_lexer_state(self, text):
1539 #--
1540 return LexerState(text)
1543def _check_regex_collisions(terminal_to_regexp: Dict[TerminalDef, str], comparator, strict_mode, max_collisions_to_show=8):
1544 if not comparator:
1545 comparator = interegular.Comparator.from_regexes(terminal_to_regexp)
1547 ##
1549 ##
1551 max_time = 2 if strict_mode else 0.2
1553 ##
1555 if comparator.count_marked_pairs() >= max_collisions_to_show:
1556 return
1557 for group in classify(terminal_to_regexp, lambda t: t.priority).values():
1558 for a, b in comparator.check(group, skip_marked=True):
1559 assert a.priority == b.priority
1560 ##
1562 comparator.mark(a, b)
1564 ##
1566 message = f"Collision between Terminals {a.name} and {b.name}. "
1567 try:
1568 example = comparator.get_example_overlap(a, b, max_time).format_multiline()
1569 except ValueError:
1570 ##
1572 example = "No example could be found fast enough. However, the collision does still exists"
1573 if strict_mode:
1574 raise LexError(f"{message}\n{example}")
1575 logger.warning("%s The lexer will choose between them arbitrarily.\n%s", message, example)
1576 if comparator.count_marked_pairs() >= max_collisions_to_show:
1577 logger.warning("Found 8 regex collisions, will not check for more.")
1578 return
1581class AbstractBasicLexer(Lexer):
1582 terminals_by_name: Dict[str, TerminalDef]
1584 @abstractmethod
1585 def __init__(self, conf: 'LexerConf', comparator=None) -> None:
1586 ...
1588 @abstractmethod
1589 def next_token(self, lex_state: LexerState, parser_state: Any = None) -> Token:
1590 ...
1592 def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]:
1593 with suppress(EOFError):
1594 while True:
1595 yield self.next_token(state, parser_state)
1598class BasicLexer(AbstractBasicLexer):
1599 terminals: Collection[TerminalDef]
1600 ignore_types: FrozenSet[str]
1601 newline_types: FrozenSet[str]
1602 user_callbacks: Dict[str, _Callback]
1603 callback: Dict[str, _Callback]
1604 re: ModuleType
1606 def __init__(self, conf: 'LexerConf', comparator=None) -> None:
1607 terminals = list(conf.terminals)
1608 assert all(isinstance(t, TerminalDef) for t in terminals), terminals
1610 self.re = conf.re_module
1612 if not conf.skip_validation: 1612 ↛ 1615line 1612 didn't jump to line 1615, because the condition on line 1612 was never true
1613 ##
1615 terminal_to_regexp = {}
1616 for t in terminals:
1617 regexp = t.pattern.to_regexp()
1618 try:
1619 self.re.compile(regexp, conf.g_regex_flags)
1620 except self.re.error:
1621 raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern))
1623 if t.pattern.min_width == 0:
1624 raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern))
1625 if t.pattern.type == "re":
1626 terminal_to_regexp[t] = regexp
1628 if not (set(conf.ignore) <= {t.name for t in terminals}):
1629 raise LexError("Ignore terminals are not defined: %s" % (set(conf.ignore) - {t.name for t in terminals}))
1631 if has_interegular:
1632 _check_regex_collisions(terminal_to_regexp, comparator, conf.strict)
1633 elif conf.strict:
1634 raise LexError("interegular must be installed for strict mode. Use `pip install 'lark[interegular]'`.")
1636 ##
1638 self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp()))
1639 self.ignore_types = frozenset(conf.ignore)
1641 terminals.sort(key=lambda x: (-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name))
1642 self.terminals = terminals
1643 self.user_callbacks = conf.callbacks
1644 self.g_regex_flags = conf.g_regex_flags
1645 self.use_bytes = conf.use_bytes
1646 self.terminals_by_name = conf.terminals_by_name
1648 self._scanner = None
1650 def _build_scanner(self):
1651 terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes)
1652 assert all(self.callback.values())
1654 for type_, f in self.user_callbacks.items(): 1654 ↛ 1655line 1654 didn't jump to line 1655, because the loop on line 1654 never started
1655 if type_ in self.callback:
1656 ##
1658 self.callback[type_] = CallChain(self.callback[type_], f, lambda t: t.type == type_)
1659 else:
1660 self.callback[type_] = f
1662 self._scanner = Scanner(terminals, self.g_regex_flags, self.re, self.use_bytes)
1664 @property
1665 def scanner(self):
1666 if self._scanner is None:
1667 self._build_scanner()
1668 return self._scanner
1670 def match(self, text, pos):
1671 return self.scanner.match(text, pos)
1673 def next_token(self, lex_state: LexerState, parser_state: Any = None) -> Token:
1674 line_ctr = lex_state.line_ctr
1675 while line_ctr.char_pos < len(lex_state.text):
1676 res = self.match(lex_state.text, line_ctr.char_pos)
1677 if not res:
1678 allowed = self.scanner.allowed_types - self.ignore_types
1679 if not allowed:
1680 allowed = {"<END-OF-FILE>"}
1681 raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column,
1682 allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token],
1683 state=parser_state, terminals_by_name=self.terminals_by_name)
1685 value, type_ = res
1687 ignored = type_ in self.ignore_types
1688 t = None
1689 if not ignored or type_ in self.callback: 1689 ↛ 1691line 1689 didn't jump to line 1691, because the condition on line 1689 was never false
1690 t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
1691 line_ctr.feed(value, type_ in self.newline_types)
1692 if t is not None: 1692 ↛ 1675line 1692 didn't jump to line 1675, because the condition on line 1692 was never false
1693 t.end_line = line_ctr.line
1694 t.end_column = line_ctr.column
1695 t.end_pos = line_ctr.char_pos
1696 if t.type in self.callback:
1697 t = self.callback[t.type](t)
1698 if not ignored: 1698 ↛ 1675line 1698 didn't jump to line 1675, because the condition on line 1698 was never false
1699 if not isinstance(t, Token): 1699 ↛ 1700line 1699 didn't jump to line 1700, because the condition on line 1699 was never true
1700 raise LexError("Callbacks must return a token (returned %r)" % t)
1701 lex_state.last_token = t
1702 return t
1704 ##
1706 raise EOFError(self)
1709class ContextualLexer(Lexer):
1710 lexers: Dict[int, AbstractBasicLexer]
1711 root_lexer: AbstractBasicLexer
1713 BasicLexer: Type[AbstractBasicLexer] = BasicLexer
1715 def __init__(self, conf: 'LexerConf', states: Dict[int, Collection[str]], always_accept: Collection[str]=()) -> None:
1716 terminals = list(conf.terminals)
1717 terminals_by_name = conf.terminals_by_name
1719 trad_conf = copy(conf)
1720 trad_conf.terminals = terminals
1722 if has_interegular and not conf.skip_validation: 1722 ↛ 1723line 1722 didn't jump to line 1723, because the condition on line 1722 was never true
1723 comparator = interegular.Comparator.from_regexes({t: t.pattern.to_regexp() for t in terminals})
1724 else:
1725 comparator = None
1726 lexer_by_tokens: Dict[FrozenSet[str], AbstractBasicLexer] = {}
1727 self.lexers = {}
1728 for state, accepts in states.items():
1729 key = frozenset(accepts)
1730 try:
1731 lexer = lexer_by_tokens[key]
1732 except KeyError:
1733 accepts = set(accepts) | set(conf.ignore) | set(always_accept)
1734 lexer_conf = copy(trad_conf)
1735 lexer_conf.terminals = [terminals_by_name[n] for n in accepts if n in terminals_by_name]
1736 lexer = self.BasicLexer(lexer_conf, comparator)
1737 lexer_by_tokens[key] = lexer
1739 self.lexers[state] = lexer
1741 assert trad_conf.terminals is terminals
1742 trad_conf.skip_validation = True ##
1744 self.root_lexer = self.BasicLexer(trad_conf, comparator)
1746 def lex(self, lexer_state: LexerState, parser_state: 'ParserState') -> Iterator[Token]:
1747 try:
1748 while True:
1749 lexer = self.lexers[parser_state.position]
1750 yield lexer.next_token(lexer_state, parser_state)
1751 except EOFError:
1752 pass
1753 except UnexpectedCharacters as e:
1754 ##
1756 ##
1758 try:
1759 last_token = lexer_state.last_token ##
1761 token = self.root_lexer.next_token(lexer_state, parser_state)
1762 raise UnexpectedToken(token, e.allowed, state=parser_state, token_history=[last_token], terminals_by_name=self.root_lexer.terminals_by_name)
1763 except UnexpectedCharacters:
1764 raise e ##
1769_ParserArgType: 'TypeAlias' = 'Literal["earley", "lalr", "cyk", "auto"]'
1770_LexerArgType: 'TypeAlias' = 'Union[Literal["auto", "basic", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]]'
1771_LexerCallback = Callable[[Token], Token]
1772ParserCallbacks = Dict[str, Callable]
1774class LexerConf(Serialize):
1775 __serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type'
1776 __serialize_namespace__ = TerminalDef,
1778 terminals: Collection[TerminalDef]
1779 re_module: ModuleType
1780 ignore: Collection[str]
1781 postlex: 'Optional[PostLex]'
1782 callbacks: Dict[str, _LexerCallback]
1783 g_regex_flags: int
1784 skip_validation: bool
1785 use_bytes: bool
1786 lexer_type: Optional[_LexerArgType]
1787 strict: bool
1789 def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None,
1790 callbacks: Optional[Dict[str, _LexerCallback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False, strict: bool=False):
1791 self.terminals = terminals
1792 self.terminals_by_name = {t.name: t for t in self.terminals}
1793 assert len(self.terminals) == len(self.terminals_by_name)
1794 self.ignore = ignore
1795 self.postlex = postlex
1796 self.callbacks = callbacks or {}
1797 self.g_regex_flags = g_regex_flags
1798 self.re_module = re_module
1799 self.skip_validation = skip_validation
1800 self.use_bytes = use_bytes
1801 self.strict = strict
1802 self.lexer_type = None
1804 def _deserialize(self):
1805 self.terminals_by_name = {t.name: t for t in self.terminals}
1807 def __deepcopy__(self, memo=None):
1808 return type(self)(
1809 deepcopy(self.terminals, memo),
1810 self.re_module,
1811 deepcopy(self.ignore, memo),
1812 deepcopy(self.postlex, memo),
1813 deepcopy(self.callbacks, memo),
1814 deepcopy(self.g_regex_flags, memo),
1815 deepcopy(self.skip_validation, memo),
1816 deepcopy(self.use_bytes, memo),
1817 )
1819class ParserConf(Serialize):
1820 __serialize_fields__ = 'rules', 'start', 'parser_type'
1822 rules: List['Rule']
1823 callbacks: ParserCallbacks
1824 start: List[str]
1825 parser_type: _ParserArgType
1827 def __init__(self, rules: List['Rule'], callbacks: ParserCallbacks, start: List[str]):
1828 assert isinstance(start, list)
1829 self.rules = rules
1830 self.callbacks = callbacks
1831 self.start = start
1834from functools import partial, wraps
1835from itertools import product
1838class ExpandSingleChild:
1839 def __init__(self, node_builder):
1840 self.node_builder = node_builder
1842 def __call__(self, children):
1843 if len(children) == 1:
1844 return children[0]
1845 else:
1846 return self.node_builder(children)
1850class PropagatePositions:
1851 def __init__(self, node_builder, node_filter=None):
1852 self.node_builder = node_builder
1853 self.node_filter = node_filter
1855 def __call__(self, children):
1856 res = self.node_builder(children)
1858 if isinstance(res, Tree):
1859 ##
1861 ##
1863 ##
1865 ##
1868 res_meta = res.meta
1870 first_meta = self._pp_get_meta(children)
1871 if first_meta is not None:
1872 if not hasattr(res_meta, 'line'):
1873 ##
1875 res_meta.line = getattr(first_meta, 'container_line', first_meta.line)
1876 res_meta.column = getattr(first_meta, 'container_column', first_meta.column)
1877 res_meta.start_pos = getattr(first_meta, 'container_start_pos', first_meta.start_pos)
1878 res_meta.empty = False
1880 res_meta.container_line = getattr(first_meta, 'container_line', first_meta.line)
1881 res_meta.container_column = getattr(first_meta, 'container_column', first_meta.column)
1882 res_meta.container_start_pos = getattr(first_meta, 'container_start_pos', first_meta.start_pos)
1884 last_meta = self._pp_get_meta(reversed(children))
1885 if last_meta is not None:
1886 if not hasattr(res_meta, 'end_line'):
1887 res_meta.end_line = getattr(last_meta, 'container_end_line', last_meta.end_line)
1888 res_meta.end_column = getattr(last_meta, 'container_end_column', last_meta.end_column)
1889 res_meta.end_pos = getattr(last_meta, 'container_end_pos', last_meta.end_pos)
1890 res_meta.empty = False
1892 res_meta.container_end_line = getattr(last_meta, 'container_end_line', last_meta.end_line)
1893 res_meta.container_end_column = getattr(last_meta, 'container_end_column', last_meta.end_column)
1894 res_meta.container_end_pos = getattr(last_meta, 'container_end_pos', last_meta.end_pos)
1896 return res
1898 def _pp_get_meta(self, children):
1899 for c in children:
1900 if self.node_filter is not None and not self.node_filter(c):
1901 continue
1902 if isinstance(c, Tree):
1903 if not c.meta.empty:
1904 return c.meta
1905 elif isinstance(c, Token):
1906 return c
1907 elif hasattr(c, '__lark_meta__'):
1908 return c.__lark_meta__()
1910def make_propagate_positions(option):
1911 if callable(option): 1911 ↛ 1912line 1911 didn't jump to line 1912, because the condition on line 1911 was never true
1912 return partial(PropagatePositions, node_filter=option)
1913 elif option is True: 1913 ↛ 1914line 1913 didn't jump to line 1914, because the condition on line 1913 was never true
1914 return PropagatePositions
1915 elif option is False: 1915 ↛ 1918line 1915 didn't jump to line 1918, because the condition on line 1915 was never false
1916 return None
1918 raise ConfigurationError('Invalid option for propagate_positions: %r' % option)
1921class ChildFilter:
1922 def __init__(self, to_include, append_none, node_builder):
1923 self.node_builder = node_builder
1924 self.to_include = to_include
1925 self.append_none = append_none
1927 def __call__(self, children):
1928 filtered = []
1930 for i, to_expand, add_none in self.to_include:
1931 if add_none:
1932 filtered += [None] * add_none
1933 if to_expand:
1934 filtered += children[i].children
1935 else:
1936 filtered.append(children[i])
1938 if self.append_none:
1939 filtered += [None] * self.append_none
1941 return self.node_builder(filtered)
1944class ChildFilterLALR(ChildFilter):
1945 #--
1947 def __call__(self, children):
1948 filtered = []
1949 for i, to_expand, add_none in self.to_include:
1950 if add_none:
1951 filtered += [None] * add_none
1952 if to_expand:
1953 if filtered:
1954 filtered += children[i].children
1955 else: ##
1957 filtered = children[i].children
1958 else:
1959 filtered.append(children[i])
1961 if self.append_none:
1962 filtered += [None] * self.append_none
1964 return self.node_builder(filtered)
1967class ChildFilterLALR_NoPlaceholders(ChildFilter):
1968 #--
1969 def __init__(self, to_include, node_builder):
1970 self.node_builder = node_builder
1971 self.to_include = to_include
1973 def __call__(self, children):
1974 filtered = []
1975 for i, to_expand in self.to_include:
1976 if to_expand: 1976 ↛ 1977line 1976 didn't jump to line 1977, because the condition on line 1976 was never true
1977 if filtered:
1978 filtered += children[i].children
1979 else: ##
1981 filtered = children[i].children
1982 else:
1983 filtered.append(children[i])
1984 return self.node_builder(filtered)
1987def _should_expand(sym):
1988 return not sym.is_term and sym.name.startswith('_')
1991def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices: List[bool]):
1992 ##
1994 if _empty_indices: 1994 ↛ 1995line 1994 didn't jump to line 1995, because the condition on line 1994 was never true
1995 assert _empty_indices.count(False) == len(expansion)
1996 s = ''.join(str(int(b)) for b in _empty_indices)
1997 empty_indices = [len(ones) for ones in s.split('0')]
1998 assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion))
1999 else:
2000 empty_indices = [0] * (len(expansion)+1)
2002 to_include = []
2003 nones_to_add = 0
2004 for i, sym in enumerate(expansion):
2005 nones_to_add += empty_indices[i]
2006 if keep_all_tokens or not (sym.is_term and sym.filter_out):
2007 to_include.append((i, _should_expand(sym), nones_to_add))
2008 nones_to_add = 0
2010 nones_to_add += empty_indices[len(expansion)]
2012 if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include): 2012 ↛ exit, 2012 ↛ exit2 missed branches: 1) line 2012 didn't run the generator expression on line 2012, 2) line 2012 didn't return from function 'maybe_create_child_filter', because the condition on line 2012 was never false
2013 if _empty_indices or ambiguous: 2013 ↛ 2014line 2013 didn't jump to line 2014, because the condition on line 2013 was never true
2014 return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add)
2015 else:
2016 ##
2018 return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include])
2021class AmbiguousExpander:
2022 #--
2023 def __init__(self, to_expand, tree_class, node_builder):
2024 self.node_builder = node_builder
2025 self.tree_class = tree_class
2026 self.to_expand = to_expand
2028 def __call__(self, children):
2029 def _is_ambig_tree(t):
2030 return hasattr(t, 'data') and t.data == '_ambig'
2032 ##
2034 ##
2036 ##
2038 ##
2040 ambiguous = []
2041 for i, child in enumerate(children):
2042 if _is_ambig_tree(child):
2043 if i in self.to_expand:
2044 ambiguous.append(i)
2046 child.expand_kids_by_data('_ambig')
2048 if not ambiguous:
2049 return self.node_builder(children)
2051 expand = [child.children if i in ambiguous else (child,) for i, child in enumerate(children)]
2052 return self.tree_class('_ambig', [self.node_builder(list(f)) for f in product(*expand)])
2055def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens):
2056 to_expand = [i for i, sym in enumerate(expansion)
2057 if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))]
2058 if to_expand:
2059 return partial(AmbiguousExpander, to_expand, tree_class)
2062class AmbiguousIntermediateExpander:
2063 #--
2065 def __init__(self, tree_class, node_builder):
2066 self.node_builder = node_builder
2067 self.tree_class = tree_class
2069 def __call__(self, children):
2070 def _is_iambig_tree(child):
2071 return hasattr(child, 'data') and child.data == '_iambig'
2073 def _collapse_iambig(children):
2074 #--
2076 ##
2078 ##
2080 if children and _is_iambig_tree(children[0]):
2081 iambig_node = children[0]
2082 result = []
2083 for grandchild in iambig_node.children:
2084 collapsed = _collapse_iambig(grandchild.children)
2085 if collapsed:
2086 for child in collapsed:
2087 child.children += children[1:]
2088 result += collapsed
2089 else:
2090 new_tree = self.tree_class('_inter', grandchild.children + children[1:])
2091 result.append(new_tree)
2092 return result
2094 collapsed = _collapse_iambig(children)
2095 if collapsed:
2096 processed_nodes = [self.node_builder(c.children) for c in collapsed]
2097 return self.tree_class('_ambig', processed_nodes)
2099 return self.node_builder(children)
2103def inplace_transformer(func):
2104 @wraps(func)
2105 def f(children):
2106 ##
2108 tree = Tree(func.__name__, children)
2109 return func(tree)
2110 return f
2113def apply_visit_wrapper(func, name, wrapper):
2114 if wrapper is _vargs_meta or wrapper is _vargs_meta_inline:
2115 raise NotImplementedError("Meta args not supported for internal transformer")
2117 @wraps(func)
2118 def f(children):
2119 return wrapper(func, name, children, None)
2120 return f
2123class ParseTreeBuilder:
2124 def __init__(self, rules, tree_class, propagate_positions=False, ambiguous=False, maybe_placeholders=False):
2125 self.tree_class = tree_class
2126 self.propagate_positions = propagate_positions
2127 self.ambiguous = ambiguous
2128 self.maybe_placeholders = maybe_placeholders
2130 self.rule_builders = list(self._init_builders(rules))
2132 def _init_builders(self, rules):
2133 propagate_positions = make_propagate_positions(self.propagate_positions)
2135 for rule in rules:
2136 options = rule.options
2137 keep_all_tokens = options.keep_all_tokens
2138 expand_single_child = options.expand1
2140 wrapper_chain = list(filter(None, [
2141 (expand_single_child and not rule.alias) and ExpandSingleChild,
2142 maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None),
2143 propagate_positions,
2144 self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens),
2145 self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class)
2146 ]))
2148 yield rule, wrapper_chain
2150 def create_callback(self, transformer=None):
2151 callbacks = {}
2153 default_handler = getattr(transformer, '__default__', None)
2154 if default_handler: 2154 ↛ 2155line 2154 didn't jump to line 2155, because the condition on line 2154 was never true
2155 def default_callback(data, children):
2156 return default_handler(data, children, None)
2157 else:
2158 default_callback = self.tree_class
2160 for rule, wrapper_chain in self.rule_builders:
2162 user_callback_name = rule.alias or rule.options.template_source or rule.origin.name
2163 try:
2164 f = getattr(transformer, user_callback_name)
2165 wrapper = getattr(f, 'visit_wrapper', None)
2166 if wrapper is not None:
2167 f = apply_visit_wrapper(f, user_callback_name, wrapper)
2168 elif isinstance(transformer, Transformer_InPlace):
2169 f = inplace_transformer(f)
2170 except AttributeError:
2171 f = partial(default_callback, user_callback_name)
2173 for w in wrapper_chain:
2174 f = w(f)
2176 if rule in callbacks: 2176 ↛ 2177line 2176 didn't jump to line 2177, because the condition on line 2176 was never true
2177 raise GrammarError("Rule '%s' already exists" % (rule,))
2179 callbacks[rule] = f
2181 return callbacks
2185class Action:
2186 def __init__(self, name):
2187 self.name = name
2188 def __str__(self):
2189 return self.name
2190 def __repr__(self):
2191 return str(self)
2193Shift = Action('Shift')
2194Reduce = Action('Reduce')
2196StateT = TypeVar("StateT")
2198class ParseTableBase(Generic[StateT]):
2199 states: Dict[StateT, Dict[str, Tuple]]
2200 start_states: Dict[str, StateT]
2201 end_states: Dict[str, StateT]
2203 def __init__(self, states, start_states, end_states):
2204 self.states = states
2205 self.start_states = start_states
2206 self.end_states = end_states
2208 def serialize(self, memo):
2209 tokens = Enumerator()
2211 states = {
2212 state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg))
2213 for token, (action, arg) in actions.items()}
2214 for state, actions in self.states.items()
2215 }
2217 return {
2218 'tokens': tokens.reversed(),
2219 'states': states,
2220 'start_states': self.start_states,
2221 'end_states': self.end_states,
2222 }
2224 @classmethod
2225 def deserialize(cls, data, memo):
2226 tokens = data['tokens']
2227 states = {
2228 state: {tokens[token]: ((Reduce, Rule.deserialize(arg, memo)) if action==1 else (Shift, arg))
2229 for token, (action, arg) in actions.items()}
2230 for state, actions in data['states'].items()
2231 }
2232 return cls(states, data['start_states'], data['end_states'])
2234class ParseTable(ParseTableBase['State']):
2235 #--
2236 pass
2239class IntParseTable(ParseTableBase[int]):
2240 #--
2242 @classmethod
2243 def from_ParseTable(cls, parse_table: ParseTable):
2244 enum = list(parse_table.states)
2245 state_to_idx: Dict['State', int] = {s:i for i,s in enumerate(enum)}
2246 int_states = {}
2248 for s, la in parse_table.states.items():
2249 la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v
2250 for k,v in la.items()}
2251 int_states[ state_to_idx[s] ] = la
2254 start_states = {start:state_to_idx[s] for start, s in parse_table.start_states.items()}
2255 end_states = {start:state_to_idx[s] for start, s in parse_table.end_states.items()}
2256 return cls(int_states, start_states, end_states)
2260class ParseConf(Generic[StateT]):
2261 __slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states'
2263 parse_table: ParseTableBase[StateT]
2264 callbacks: ParserCallbacks
2265 start: str
2267 start_state: StateT
2268 end_state: StateT
2269 states: Dict[StateT, Dict[str, tuple]]
2271 def __init__(self, parse_table: ParseTableBase[StateT], callbacks: ParserCallbacks, start: str):
2272 self.parse_table = parse_table
2274 self.start_state = self.parse_table.start_states[start]
2275 self.end_state = self.parse_table.end_states[start]
2276 self.states = self.parse_table.states
2278 self.callbacks = callbacks
2279 self.start = start
2281class ParserState(Generic[StateT]):
2282 __slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack'
2284 parse_conf: ParseConf[StateT]
2285 lexer: LexerThread
2286 state_stack: List[StateT]
2287 value_stack: list
2289 def __init__(self, parse_conf: ParseConf[StateT], lexer: LexerThread, state_stack=None, value_stack=None):
2290 self.parse_conf = parse_conf
2291 self.lexer = lexer
2292 self.state_stack = state_stack or [self.parse_conf.start_state]
2293 self.value_stack = value_stack or []
2295 @property
2296 def position(self) -> StateT:
2297 return self.state_stack[-1]
2299 ##
2301 def __eq__(self, other) -> bool:
2302 if not isinstance(other, ParserState):
2303 return NotImplemented
2304 return len(self.state_stack) == len(other.state_stack) and self.position == other.position
2306 def __copy__(self):
2307 return type(self)(
2308 self.parse_conf,
2309 self.lexer, ##
2311 copy(self.state_stack),
2312 deepcopy(self.value_stack),
2313 )
2315 def copy(self) -> 'ParserState[StateT]':
2316 return copy(self)
2318 def feed_token(self, token: Token, is_end=False) -> Any:
2319 state_stack = self.state_stack
2320 value_stack = self.value_stack
2321 states = self.parse_conf.states
2322 end_state = self.parse_conf.end_state
2323 callbacks = self.parse_conf.callbacks
2325 while True:
2326 state = state_stack[-1]
2327 try:
2328 action, arg = states[state][token.type]
2329 except KeyError:
2330 expected = {s for s in states[state].keys() if s.isupper()}
2331 raise UnexpectedToken(token, expected, state=self, interactive_parser=None)
2333 assert arg != end_state
2335 if action is Shift:
2336 ##
2338 assert not is_end
2339 state_stack.append(arg)
2340 value_stack.append(token if token.type not in callbacks else callbacks[token.type](token))
2341 return
2342 else:
2343 ##
2345 rule = arg
2346 size = len(rule.expansion)
2347 if size: 2347 ↛ 2352line 2347 didn't jump to line 2352, because the condition on line 2347 was never false
2348 s = value_stack[-size:]
2349 del state_stack[-size:]
2350 del value_stack[-size:]
2351 else:
2352 s = []
2354 value = callbacks[rule](s) if callbacks else s
2356 _action, new_state = states[state_stack[-1]][rule.origin.name]
2357 assert _action is Shift
2358 state_stack.append(new_state)
2359 value_stack.append(value)
2361 if is_end and state_stack[-1] == end_state: 2361 ↛ 2325line 2361 didn't jump to line 2325, because the condition on line 2361 was never false
2362 return value_stack[-1]
2365class LALR_Parser(Serialize):
2366 def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False):
2367 analysis = LALR_Analyzer(parser_conf, debug=debug, strict=strict)
2368 analysis.compute_lalr()
2369 callbacks = parser_conf.callbacks
2371 self._parse_table = analysis.parse_table
2372 self.parser_conf = parser_conf
2373 self.parser = _Parser(analysis.parse_table, callbacks, debug)
2375 @classmethod
2376 def deserialize(cls, data, memo, callbacks, debug=False):
2377 inst = cls.__new__(cls)
2378 inst._parse_table = IntParseTable.deserialize(data, memo)
2379 inst.parser = _Parser(inst._parse_table, callbacks, debug)
2380 return inst
2382 def serialize(self, memo: Any = None) -> Dict[str, Any]:
2383 return self._parse_table.serialize(memo)
2385 def parse_interactive(self, lexer: LexerThread, start: str):
2386 return self.parser.parse(lexer, start, start_interactive=True)
2388 def parse(self, lexer, start, on_error=None):
2389 try:
2390 return self.parser.parse(lexer, start)
2391 except UnexpectedInput as e:
2392 if on_error is None:
2393 raise
2395 while True:
2396 if isinstance(e, UnexpectedCharacters):
2397 s = e.interactive_parser.lexer_thread.state
2398 p = s.line_ctr.char_pos
2400 if not on_error(e):
2401 raise e
2403 if isinstance(e, UnexpectedCharacters):
2404 ##
2406 if p == s.line_ctr.char_pos:
2407 s.line_ctr.feed(s.text[p:p+1])
2409 try:
2410 return e.interactive_parser.resume_parse()
2411 except UnexpectedToken as e2:
2412 if (isinstance(e, UnexpectedToken)
2413 and e.token.type == e2.token.type == '$END'
2414 and e.interactive_parser == e2.interactive_parser):
2415 ##
2417 raise e2
2418 e = e2
2419 except UnexpectedCharacters as e2:
2420 e = e2
2423class _Parser:
2424 parse_table: ParseTableBase
2425 callbacks: ParserCallbacks
2426 debug: bool
2428 def __init__(self, parse_table: ParseTableBase, callbacks: ParserCallbacks, debug: bool=False):
2429 self.parse_table = parse_table
2430 self.callbacks = callbacks
2431 self.debug = debug
2433 def parse(self, lexer: LexerThread, start: str, value_stack=None, state_stack=None, start_interactive=False):
2434 parse_conf = ParseConf(self.parse_table, self.callbacks, start)
2435 parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
2436 if start_interactive: 2436 ↛ 2438line 2436 didn't jump to line 2438, because the condition on line 2436 was never false
2437 return InteractiveParser(self, parser_state, parser_state.lexer)
2438 return self.parse_from_state(parser_state)
2441 def parse_from_state(self, state: ParserState, last_token: Optional[Token]=None):
2442 #--
2443 try:
2444 token = last_token
2445 for token in state.lexer.lex(state):
2446 assert token is not None
2447 state.feed_token(token)
2449 end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
2450 return state.feed_token(end_token, True)
2451 except UnexpectedInput as e:
2452 try:
2453 e.interactive_parser = InteractiveParser(self, state, state.lexer)
2454 except NameError:
2455 pass
2456 raise e
2457 except Exception as e:
2458 if self.debug:
2459 print("")
2460 print("STATE STACK DUMP")
2461 print("----------------")
2462 for i, s in enumerate(state.state_stack):
2463 print('%d)' % i , s)
2464 print("")
2466 raise
2469class InteractiveParser:
2470 #--
2471 def __init__(self, parser, parser_state, lexer_thread: LexerThread):
2472 self.parser = parser
2473 self.parser_state = parser_state
2474 self.lexer_thread = lexer_thread
2475 self.result = None
2477 @property
2478 def lexer_state(self) -> LexerThread:
2479 warnings.warn("lexer_state will be removed in subsequent releases. Use lexer_thread instead.", DeprecationWarning)
2480 return self.lexer_thread
2482 def feed_token(self, token: Token):
2483 #--
2484 return self.parser_state.feed_token(token, token.type == '$END')
2486 def iter_parse(self) -> Iterator[Token]:
2487 #--
2488 for token in self.lexer_thread.lex(self.parser_state):
2489 yield token
2490 self.result = self.feed_token(token)
2492 def exhaust_lexer(self) -> List[Token]:
2493 #--
2494 return list(self.iter_parse())
2497 def feed_eof(self, last_token=None):
2498 #--
2499 eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else self.lexer_thread._Token('$END', '', 0, 1, 1)
2500 return self.feed_token(eof)
2503 def __copy__(self):
2504 #--
2505 return type(self)(
2506 self.parser,
2507 copy(self.parser_state),
2508 copy(self.lexer_thread),
2509 )
2511 def copy(self):
2512 return copy(self)
2514 def __eq__(self, other):
2515 if not isinstance(other, InteractiveParser):
2516 return False
2518 return self.parser_state == other.parser_state and self.lexer_thread == other.lexer_thread
2520 def as_immutable(self):
2521 #--
2522 p = copy(self)
2523 return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_thread)
2525 def pretty(self):
2526 #--
2527 out = ["Parser choices:"]
2528 for k, v in self.choices().items():
2529 out.append('\t- %s -> %r' % (k, v))
2530 out.append('stack size: %s' % len(self.parser_state.state_stack))
2531 return '\n'.join(out)
2533 def choices(self):
2534 #--
2535 return self.parser_state.parse_conf.parse_table.states[self.parser_state.position]
2537 def accepts(self):
2538 #--
2539 accepts = set()
2540 conf_no_callbacks = copy(self.parser_state.parse_conf)
2541 ##
2543 ##
2545 conf_no_callbacks.callbacks = {}
2546 for t in self.choices():
2547 if t.isupper(): ##
2549 new_cursor = copy(self)
2550 new_cursor.parser_state.parse_conf = conf_no_callbacks
2551 try:
2552 new_cursor.feed_token(self.lexer_thread._Token(t, ''))
2553 except UnexpectedToken:
2554 pass
2555 else:
2556 accepts.add(t)
2557 return accepts
2559 def resume_parse(self):
2560 #--
2561 return self.parser.parse_from_state(self.parser_state, last_token=self.lexer_thread.state.last_token)
2565class ImmutableInteractiveParser(InteractiveParser):
2566 #--
2568 result = None
2570 def __hash__(self):
2571 return hash((self.parser_state, self.lexer_thread))
2573 def feed_token(self, token):
2574 c = copy(self)
2575 c.result = InteractiveParser.feed_token(c, token)
2576 return c
2578 def exhaust_lexer(self):
2579 #--
2580 cursor = self.as_mutable()
2581 cursor.exhaust_lexer()
2582 return cursor.as_immutable()
2584 def as_mutable(self):
2585 #--
2586 p = copy(self)
2587 return InteractiveParser(p.parser, p.parser_state, p.lexer_thread)
2591def _wrap_lexer(lexer_class):
2592 future_interface = getattr(lexer_class, '__future_interface__', False)
2593 if future_interface:
2594 return lexer_class
2595 else:
2596 class CustomLexerWrapper(Lexer):
2597 def __init__(self, lexer_conf):
2598 self.lexer = lexer_class(lexer_conf)
2599 def lex(self, lexer_state, parser_state):
2600 return self.lexer.lex(lexer_state.text)
2601 return CustomLexerWrapper
2604def _deserialize_parsing_frontend(data, memo, lexer_conf, callbacks, options):
2605 parser_conf = ParserConf.deserialize(data['parser_conf'], memo)
2606 cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser
2607 parser = cls.deserialize(data['parser'], memo, callbacks, options.debug)
2608 parser_conf.callbacks = callbacks
2609 return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser)
2612_parser_creators: 'Dict[str, Callable[[LexerConf, Any, Any], Any]]' = {}
2615class ParsingFrontend(Serialize):
2616 __serialize_fields__ = 'lexer_conf', 'parser_conf', 'parser'
2618 lexer_conf: LexerConf
2619 parser_conf: ParserConf
2620 options: Any
2622 def __init__(self, lexer_conf: LexerConf, parser_conf: ParserConf, options, parser=None):
2623 self.parser_conf = parser_conf
2624 self.lexer_conf = lexer_conf
2625 self.options = options
2627 ##
2629 if parser: ## 2629 ↛ 2633line 2629 didn't jump to line 2633, because the condition on line 2629 was never false
2631 self.parser = parser
2632 else:
2633 create_parser = _parser_creators.get(parser_conf.parser_type)
2634 assert create_parser is not None, "{} is not supported in standalone mode".format(
2635 parser_conf.parser_type
2636 )
2637 self.parser = create_parser(lexer_conf, parser_conf, options)
2639 ##
2641 lexer_type = lexer_conf.lexer_type
2642 self.skip_lexer = False
2643 if lexer_type in ('dynamic', 'dynamic_complete'): 2643 ↛ 2644line 2643 didn't jump to line 2644, because the condition on line 2643 was never true
2644 assert lexer_conf.postlex is None
2645 self.skip_lexer = True
2646 return
2648 if isinstance(lexer_type, type): 2648 ↛ 2649line 2648 didn't jump to line 2649, because the condition on line 2648 was never true
2649 assert issubclass(lexer_type, Lexer)
2650 self.lexer = _wrap_lexer(lexer_type)(lexer_conf)
2651 elif isinstance(lexer_type, str): 2651 ↛ 2658line 2651 didn't jump to line 2658, because the condition on line 2651 was never false
2652 create_lexer = {
2653 'basic': create_basic_lexer,
2654 'contextual': create_contextual_lexer,
2655 }[lexer_type]
2656 self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex, options)
2657 else:
2658 raise TypeError("Bad value for lexer_type: {lexer_type}")
2660 if lexer_conf.postlex: 2660 ↛ 2661line 2660 didn't jump to line 2661, because the condition on line 2660 was never true
2661 self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex)
2663 def _verify_start(self, start=None):
2664 if start is None: 2664 ↛ 2665line 2664 didn't jump to line 2665, because the condition on line 2664 was never true
2665 start_decls = self.parser_conf.start
2666 if len(start_decls) > 1:
2667 raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start_decls)
2668 start ,= start_decls
2669 elif start not in self.parser_conf.start: 2669 ↛ 2670line 2669 didn't jump to line 2670, because the condition on line 2669 was never true
2670 raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start))
2671 return start
2673 def _make_lexer_thread(self, text: str) -> Union[str, LexerThread]:
2674 cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread
2675 return text if self.skip_lexer else cls.from_text(self.lexer, text)
2677 def parse(self, text: str, start=None, on_error=None):
2678 chosen_start = self._verify_start(start)
2679 kw = {} if on_error is None else {'on_error': on_error}
2680 stream = self._make_lexer_thread(text)
2681 return self.parser.parse(stream, chosen_start, **kw)
2683 def parse_interactive(self, text: Optional[str]=None, start=None):
2684 ##
2686 ##
2688 chosen_start = self._verify_start(start)
2689 if self.parser_conf.parser_type != 'lalr': 2689 ↛ 2690line 2689 didn't jump to line 2690, because the condition on line 2689 was never true
2690 raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ")
2691 stream = self._make_lexer_thread(text) ##
2693 return self.parser.parse_interactive(stream, chosen_start)
2696def _validate_frontend_args(parser, lexer) -> None:
2697 assert_config(parser, ('lalr', 'earley', 'cyk'))
2698 if not isinstance(lexer, type): ## 2698 ↛ exitline 2698 didn't return from function '_validate_frontend_args', because the condition on line 2698 was never false
2700 expected = {
2701 'lalr': ('basic', 'contextual'),
2702 'earley': ('basic', 'dynamic', 'dynamic_complete'),
2703 'cyk': ('basic', ),
2704 }[parser]
2705 assert_config(lexer, expected, 'Parser %r does not support lexer %%r, expected one of %%s' % parser)
2708def _get_lexer_callbacks(transformer, terminals):
2709 result = {}
2710 for terminal in terminals:
2711 callback = getattr(transformer, terminal.name, None)
2712 if callback is not None: 2712 ↛ 2713line 2712 didn't jump to line 2713, because the condition on line 2712 was never true
2713 result[terminal.name] = callback
2714 return result
2716class PostLexConnector:
2717 def __init__(self, lexer, postlexer):
2718 self.lexer = lexer
2719 self.postlexer = postlexer
2721 def lex(self, lexer_state, parser_state):
2722 i = self.lexer.lex(lexer_state, parser_state)
2723 return self.postlexer.process(i)
2727def create_basic_lexer(lexer_conf, parser, postlex, options) -> BasicLexer:
2728 cls = (options and options._plugins.get('BasicLexer')) or BasicLexer
2729 return cls(lexer_conf)
2731def create_contextual_lexer(lexer_conf: LexerConf, parser, postlex, options) -> ContextualLexer:
2732 cls = (options and options._plugins.get('ContextualLexer')) or ContextualLexer
2733 parse_table: ParseTableBase[int] = parser._parse_table
2734 states: Dict[int, Collection[str]] = {idx:list(t.keys()) for idx, t in parse_table.states.items()}
2735 always_accept: Collection[str] = postlex.always_accept if postlex else ()
2736 return cls(lexer_conf, states, always_accept=always_accept)
2738def create_lalr_parser(lexer_conf: LexerConf, parser_conf: ParserConf, options=None) -> LALR_Parser:
2739 debug = options.debug if options else False
2740 strict = options.strict if options else False
2741 cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser
2742 return cls(parser_conf, debug=debug, strict=strict)
2744_parser_creators['lalr'] = create_lalr_parser
2749class PostLex(ABC):
2750 @abstractmethod
2751 def process(self, stream: Iterator[Token]) -> Iterator[Token]:
2752 return stream
2754 always_accept: Iterable[str] = ()
2756class LarkOptions(Serialize):
2757 #--
2759 start: List[str]
2760 debug: bool
2761 strict: bool
2762 transformer: 'Optional[Transformer]'
2763 propagate_positions: Union[bool, str]
2764 maybe_placeholders: bool
2765 cache: Union[bool, str]
2766 regex: bool
2767 g_regex_flags: int
2768 keep_all_tokens: bool
2769 tree_class: Optional[Callable[[str, List], Any]]
2770 parser: _ParserArgType
2771 lexer: _LexerArgType
2772 ambiguity: 'Literal["auto", "resolve", "explicit", "forest"]'
2773 postlex: Optional[PostLex]
2774 priority: 'Optional[Literal["auto", "normal", "invert"]]'
2775 lexer_callbacks: Dict[str, Callable[[Token], Token]]
2776 use_bytes: bool
2777 ordered_sets: bool
2778 edit_terminals: Optional[Callable[[TerminalDef], TerminalDef]]
2779 import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]'
2780 source_path: Optional[str]
2782 OPTIONS_DOC = r"""
2783 **=== General Options ===**
2785 start
2786 The start symbol. Either a string, or a list of strings for multiple possible starts (Default: "start")
2787 debug
2788 Display debug information and extra warnings. Use only when debugging (Default: ``False``)
2789 When used with Earley, it generates a forest graph as "sppf.png", if 'dot' is installed.
2790 strict
2791 Throw an exception on any potential ambiguity, including shift/reduce conflicts, and regex collisions.
2792 transformer
2793 Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster)
2794 propagate_positions
2795 Propagates positional attributes into the 'meta' attribute of all tree branches.
2796 Sets attributes: (line, column, end_line, end_column, start_pos, end_pos,
2797 container_line, container_column, container_end_line, container_end_column)
2798 Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating.
2799 maybe_placeholders
2800 When ``True``, the ``[]`` operator returns ``None`` when not matched.
2801 When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all.
2802 (default= ``True``)
2803 cache
2804 Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now.
2806 - When ``False``, does nothing (default)
2807 - When ``True``, caches to a temporary file in the local directory
2808 - When given a string, caches to the path pointed by the string
2809 regex
2810 When True, uses the ``regex`` module instead of the stdlib ``re``.
2811 g_regex_flags
2812 Flags that are applied to all terminals (both regex and strings)
2813 keep_all_tokens
2814 Prevent the tree builder from automagically removing "punctuation" tokens (Default: ``False``)
2815 tree_class
2816 Lark will produce trees comprised of instances of this class instead of the default ``lark.Tree``.
2818 **=== Algorithm Options ===**
2820 parser
2821 Decides which parser engine to use. Accepts "earley" or "lalr". (Default: "earley").
2822 (there is also a "cyk" option for legacy)
2823 lexer
2824 Decides whether or not to use a lexer stage
2826 - "auto" (default): Choose for me based on the parser
2827 - "basic": Use a basic lexer
2828 - "contextual": Stronger lexer (only works with parser="lalr")
2829 - "dynamic": Flexible and powerful (only with parser="earley")
2830 - "dynamic_complete": Same as dynamic, but tries *every* variation of tokenizing possible.
2831 ambiguity
2832 Decides how to handle ambiguity in the parse. Only relevant if parser="earley"
2834 - "resolve": The parser will automatically choose the simplest derivation
2835 (it chooses consistently: greedy for tokens, non-greedy for rules)
2836 - "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest).
2837 - "forest": The parser will return the root of the shared packed parse forest.
2839 **=== Misc. / Domain Specific Options ===**
2841 postlex
2842 Lexer post-processing (Default: ``None``) Only works with the basic and contextual lexers.
2843 priority
2844 How priorities should be evaluated - "auto", ``None``, "normal", "invert" (Default: "auto")
2845 lexer_callbacks
2846 Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
2847 use_bytes
2848 Accept an input of type ``bytes`` instead of ``str``.
2849 ordered_sets
2850 Should Earley use ordered-sets to achieve stable output (~10% slower than regular sets. Default: True)
2851 edit_terminals
2852 A callback for editing the terminals before parse.
2853 import_paths
2854 A List of either paths or loader functions to specify from where grammars are imported
2855 source_path
2856 Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading
2857 **=== End of Options ===**
2858 """
2859 if __doc__: 2859 ↛ 2860line 2859 didn't jump to line 2860, because the condition on line 2859 was never true
2860 __doc__ += OPTIONS_DOC
2863 ##
2865 ##
2867 ##
2869 ##
2871 ##
2873 ##
2875 _defaults: Dict[str, Any] = {
2876 'debug': False,
2877 'strict': False,
2878 'keep_all_tokens': False,
2879 'tree_class': None,
2880 'cache': False,
2881 'postlex': None,
2882 'parser': 'earley',
2883 'lexer': 'auto',
2884 'transformer': None,
2885 'start': 'start',
2886 'priority': 'auto',
2887 'ambiguity': 'auto',
2888 'regex': False,
2889 'propagate_positions': False,
2890 'lexer_callbacks': {},
2891 'maybe_placeholders': True,
2892 'edit_terminals': None,
2893 'g_regex_flags': 0,
2894 'use_bytes': False,
2895 'ordered_sets': True,
2896 'import_paths': [],
2897 'source_path': None,
2898 '_plugins': {},
2899 }
2901 def __init__(self, options_dict: Dict[str, Any]) -> None:
2902 o = dict(options_dict)
2904 options = {}
2905 for name, default in self._defaults.items():
2906 if name in o: 2906 ↛ 2911line 2906 didn't jump to line 2911, because the condition on line 2906 was never false
2907 value = o.pop(name)
2908 if isinstance(default, bool) and name not in ('cache', 'use_bytes', 'propagate_positions'):
2909 value = bool(value)
2910 else:
2911 value = default
2913 options[name] = value
2915 if isinstance(options['start'], str): 2915 ↛ 2916line 2915 didn't jump to line 2916, because the condition on line 2915 was never true
2916 options['start'] = [options['start']]
2918 self.__dict__['options'] = options
2921 assert_config(self.parser, ('earley', 'lalr', 'cyk', None))
2923 if self.parser == 'earley' and self.transformer: 2923 ↛ 2924line 2923 didn't jump to line 2924, because the condition on line 2923 was never true
2924 raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm. '
2925 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)')
2927 if o: 2927 ↛ 2928line 2927 didn't jump to line 2928, because the condition on line 2927 was never true
2928 raise ConfigurationError("Unknown options: %s" % o.keys())
2930 def __getattr__(self, name: str) -> Any:
2931 try:
2932 return self.__dict__['options'][name]
2933 except KeyError as e:
2934 raise AttributeError(e)
2936 def __setattr__(self, name: str, value: str) -> None:
2937 assert_config(name, self.options.keys(), "%r isn't a valid option. Expected one of: %s")
2938 self.options[name] = value
2940 def serialize(self, memo = None) -> Dict[str, Any]:
2941 return self.options
2943 @classmethod
2944 def deserialize(cls, data: Dict[str, Any], memo: Dict[int, Union[TerminalDef, Rule]]) -> "LarkOptions":
2945 return cls(data)
2948##
2950##
2952_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'lexer_callbacks', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class', '_plugins'}
2954_VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None)
2955_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest')
2958_T = TypeVar('_T', bound="Lark")
2960class Lark(Serialize):
2961 #--
2963 source_path: str
2964 source_grammar: str
2965 grammar: 'Grammar'
2966 options: LarkOptions
2967 lexer: Lexer
2968 parser: 'ParsingFrontend'
2969 terminals: Collection[TerminalDef]
2971 def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
2972 self.options = LarkOptions(options)
2973 re_module: types.ModuleType
2975 ##
2977 use_regex = self.options.regex
2978 if use_regex:
2979 if _has_regex:
2980 re_module = regex
2981 else:
2982 raise ImportError('`regex` module must be installed if calling `Lark(regex=True)`.')
2983 else:
2984 re_module = re
2986 ##
2988 if self.options.source_path is None:
2989 try:
2990 self.source_path = grammar.name ##
2992 except AttributeError:
2993 self.source_path = '<string>'
2994 else:
2995 self.source_path = self.options.source_path
2997 ##
2999 try:
3000 read = grammar.read ##
3002 except AttributeError:
3003 pass
3004 else:
3005 grammar = read()
3007 cache_fn = None
3008 cache_sha256 = None
3009 if isinstance(grammar, str):
3010 self.source_grammar = grammar
3011 if self.options.use_bytes:
3012 if not isascii(grammar):
3013 raise ConfigurationError("Grammar must be ascii only, when use_bytes=True")
3015 if self.options.cache:
3016 if self.options.parser != 'lalr':
3017 raise ConfigurationError("cache only works with parser='lalr' for now")
3019 unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals', '_plugins')
3020 options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable)
3021 from . import __version__
3022 s = grammar + options_str + __version__ + str(sys.version_info[:2])
3023 cache_sha256 = sha256_digest(s)
3025 if isinstance(self.options.cache, str):
3026 cache_fn = self.options.cache
3027 else:
3028 if self.options.cache is not True:
3029 raise ConfigurationError("cache argument must be bool or str")
3031 try:
3032 username = getpass.getuser()
3033 except Exception:
3034 ##
3036 ##
3038 ##
3040 username = "unknown"
3042 cache_fn = tempfile.gettempdir() + "/.lark_cache_%s_%s_%s_%s.tmp" % (username, cache_sha256, *sys.version_info[:2])
3044 old_options = self.options
3045 try:
3046 with FS.open(cache_fn, 'rb') as f:
3047 logger.debug('Loading grammar from cache: %s', cache_fn)
3048 ##
3050 for name in (set(options) - _LOAD_ALLOWED_OPTIONS):
3051 del options[name]
3052 file_sha256 = f.readline().rstrip(b'\n')
3053 cached_used_files = pickle.load(f)
3054 if file_sha256 == cache_sha256.encode('utf8') and verify_used_files(cached_used_files):
3055 cached_parser_data = pickle.load(f)
3056 self._load(cached_parser_data, **options)
3057 return
3058 except FileNotFoundError:
3059 ##
3061 pass
3062 except Exception: ##
3064 logger.exception("Failed to load Lark from cache: %r. We will try to carry on.", cache_fn)
3066 ##
3068 ##
3070 self.options = old_options
3073 ##
3075 self.grammar, used_files = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens)
3076 else:
3077 assert isinstance(grammar, Grammar)
3078 self.grammar = grammar
3081 if self.options.lexer == 'auto':
3082 if self.options.parser == 'lalr':
3083 self.options.lexer = 'contextual'
3084 elif self.options.parser == 'earley':
3085 if self.options.postlex is not None:
3086 logger.info("postlex can't be used with the dynamic lexer, so we use 'basic' instead. "
3087 "Consider using lalr with contextual instead of earley")
3088 self.options.lexer = 'basic'
3089 else:
3090 self.options.lexer = 'dynamic'
3091 elif self.options.parser == 'cyk':
3092 self.options.lexer = 'basic'
3093 else:
3094 assert False, self.options.parser
3095 lexer = self.options.lexer
3096 if isinstance(lexer, type):
3097 assert issubclass(lexer, Lexer) ##
3099 else:
3100 assert_config(lexer, ('basic', 'contextual', 'dynamic', 'dynamic_complete'))
3101 if self.options.postlex is not None and 'dynamic' in lexer:
3102 raise ConfigurationError("Can't use postlex with a dynamic lexer. Use basic or contextual instead")
3104 if self.options.ambiguity == 'auto':
3105 if self.options.parser == 'earley':
3106 self.options.ambiguity = 'resolve'
3107 else:
3108 assert_config(self.options.parser, ('earley', 'cyk'), "%r doesn't support disambiguation. Use one of these parsers instead: %s")
3110 if self.options.priority == 'auto':
3111 self.options.priority = 'normal'
3113 if self.options.priority not in _VALID_PRIORITY_OPTIONS:
3114 raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS))
3115 if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS:
3116 raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS))
3118 if self.options.parser is None:
3119 terminals_to_keep = '*'
3120 elif self.options.postlex is not None:
3121 terminals_to_keep = set(self.options.postlex.always_accept)
3122 else:
3123 terminals_to_keep = set()
3125 ##
3127 self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start, terminals_to_keep)
3129 if self.options.edit_terminals:
3130 for t in self.terminals:
3131 self.options.edit_terminals(t)
3133 self._terminals_dict = {t.name: t for t in self.terminals}
3135 ##
3137 if self.options.priority == 'invert':
3138 for rule in self.rules:
3139 if rule.options.priority is not None:
3140 rule.options.priority = -rule.options.priority
3141 for term in self.terminals:
3142 term.priority = -term.priority
3143 ##
3145 ##
3147 ##
3149 elif self.options.priority is None:
3150 for rule in self.rules:
3151 if rule.options.priority is not None:
3152 rule.options.priority = None
3153 for term in self.terminals:
3154 term.priority = 0
3156 ##
3158 self.lexer_conf = LexerConf(
3159 self.terminals, re_module, self.ignore_tokens, self.options.postlex,
3160 self.options.lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes, strict=self.options.strict
3161 )
3163 if self.options.parser:
3164 self.parser = self._build_parser()
3165 elif lexer:
3166 self.lexer = self._build_lexer()
3168 if cache_fn:
3169 logger.debug('Saving grammar to cache: %s', cache_fn)
3170 try:
3171 with FS.open(cache_fn, 'wb') as f:
3172 assert cache_sha256 is not None
3173 f.write(cache_sha256.encode('utf8') + b'\n')
3174 pickle.dump(used_files, f)
3175 self.save(f, _LOAD_ALLOWED_OPTIONS)
3176 except IOError as e:
3177 logger.exception("Failed to save Lark to cache: %r.", cache_fn, e)
3179 if __doc__: 3179 ↛ 3180line 3179 didn't jump to line 3180, because the condition on line 3179 was never true
3180 __doc__ += "\n\n" + LarkOptions.OPTIONS_DOC
3182 __serialize_fields__ = 'parser', 'rules', 'options'
3184 def _build_lexer(self, dont_ignore: bool=False) -> BasicLexer:
3185 lexer_conf = self.lexer_conf
3186 if dont_ignore:
3187 from copy import copy
3188 lexer_conf = copy(lexer_conf)
3189 lexer_conf.ignore = ()
3190 return BasicLexer(lexer_conf)
3192 def _prepare_callbacks(self) -> None:
3193 self._callbacks = {}
3194 ##
3196 if self.options.ambiguity != 'forest': 3196 ↛ 3205line 3196 didn't jump to line 3205, because the condition on line 3196 was never false
3197 self._parse_tree_builder = ParseTreeBuilder(
3198 self.rules,
3199 self.options.tree_class or Tree,
3200 self.options.propagate_positions,
3201 self.options.parser != 'lalr' and self.options.ambiguity == 'explicit',
3202 self.options.maybe_placeholders
3203 )
3204 self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
3205 self._callbacks.update(_get_lexer_callbacks(self.options.transformer, self.terminals))
3207 def _build_parser(self) -> "ParsingFrontend":
3208 self._prepare_callbacks()
3209 _validate_frontend_args(self.options.parser, self.options.lexer)
3210 parser_conf = ParserConf(self.rules, self._callbacks, self.options.start)
3211 return _construct_parsing_frontend(
3212 self.options.parser,
3213 self.options.lexer,
3214 self.lexer_conf,
3215 parser_conf,
3216 options=self.options
3217 )
3219 def save(self, f, exclude_options: Collection[str] = ()) -> None:
3220 #--
3221 if self.options.parser != 'lalr':
3222 raise NotImplementedError("Lark.save() is only implemented for the LALR(1) parser.")
3223 data, m = self.memo_serialize([TerminalDef, Rule])
3224 if exclude_options:
3225 data["options"] = {n: v for n, v in data["options"].items() if n not in exclude_options}
3226 pickle.dump({'data': data, 'memo': m}, f, protocol=pickle.HIGHEST_PROTOCOL)
3228 @classmethod
3229 def load(cls: Type[_T], f) -> _T:
3230 #--
3231 inst = cls.__new__(cls)
3232 return inst._load(f)
3234 def _deserialize_lexer_conf(self, data: Dict[str, Any], memo: Dict[int, Union[TerminalDef, Rule]], options: LarkOptions) -> LexerConf:
3235 lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo)
3236 lexer_conf.callbacks = options.lexer_callbacks or {}
3237 lexer_conf.re_module = regex if options.regex else re
3238 lexer_conf.use_bytes = options.use_bytes
3239 lexer_conf.g_regex_flags = options.g_regex_flags
3240 lexer_conf.skip_validation = True
3241 lexer_conf.postlex = options.postlex
3242 return lexer_conf
3244 def _load(self: _T, f: Any, **kwargs) -> _T:
3245 if isinstance(f, dict): 3245 ↛ 3248line 3245 didn't jump to line 3248, because the condition on line 3245 was never false
3246 d = f
3247 else:
3248 d = pickle.load(f)
3249 memo_json = d['memo']
3250 data = d['data']
3252 assert memo_json
3253 memo = SerializeMemoizer.deserialize(memo_json, {'Rule': Rule, 'TerminalDef': TerminalDef}, {})
3254 options = dict(data['options'])
3255 if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults): 3255 ↛ 3256line 3255 didn't jump to line 3256, because the condition on line 3255 was never true
3256 raise ConfigurationError("Some options are not allowed when loading a Parser: {}"
3257 .format(set(kwargs) - _LOAD_ALLOWED_OPTIONS))
3258 options.update(kwargs)
3259 self.options = LarkOptions.deserialize(options, memo)
3260 self.rules = [Rule.deserialize(r, memo) for r in data['rules']]
3261 self.source_path = '<deserialized>'
3262 _validate_frontend_args(self.options.parser, self.options.lexer)
3263 self.lexer_conf = self._deserialize_lexer_conf(data['parser'], memo, self.options)
3264 self.terminals = self.lexer_conf.terminals
3265 self._prepare_callbacks()
3266 self._terminals_dict = {t.name: t for t in self.terminals}
3267 self.parser = _deserialize_parsing_frontend(
3268 data['parser'],
3269 memo,
3270 self.lexer_conf,
3271 self._callbacks,
3272 self.options, ##
3274 )
3275 return self
3277 @classmethod
3278 def _load_from_dict(cls, data, memo, **kwargs):
3279 inst = cls.__new__(cls)
3280 return inst._load({'data': data, 'memo': memo}, **kwargs)
3282 @classmethod
3283 def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str]=None, **options) -> _T:
3284 #--
3285 if rel_to:
3286 basepath = os.path.dirname(rel_to)
3287 grammar_filename = os.path.join(basepath, grammar_filename)
3288 with open(grammar_filename, encoding='utf8') as f:
3289 return cls(f, **options)
3291 @classmethod
3292 def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: 'Sequence[str]'=[""], **options) -> _T:
3293 #--
3294 package_loader = FromPackageLoader(package, search_paths)
3295 full_path, text = package_loader(None, grammar_path)
3296 options.setdefault('source_path', full_path)
3297 options.setdefault('import_paths', [])
3298 options['import_paths'].append(package_loader)
3299 return cls(text, **options)
3301 def __repr__(self):
3302 return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer)
3305 def lex(self, text: str, dont_ignore: bool=False) -> Iterator[Token]:
3306 #--
3307 lexer: Lexer
3308 if not hasattr(self, 'lexer') or dont_ignore:
3309 lexer = self._build_lexer(dont_ignore)
3310 else:
3311 lexer = self.lexer
3312 lexer_thread = LexerThread.from_text(lexer, text)
3313 stream = lexer_thread.lex(None)
3314 if self.options.postlex:
3315 return self.options.postlex.process(stream)
3316 return stream
3318 def get_terminal(self, name: str) -> TerminalDef:
3319 #--
3320 return self._terminals_dict[name]
3322 def parse_interactive(self, text: Optional[str]=None, start: Optional[str]=None) -> 'InteractiveParser':
3323 #--
3324 return self.parser.parse_interactive(text, start=start)
3326 def parse(self, text: str, start: Optional[str]=None, on_error: 'Optional[Callable[[UnexpectedInput], bool]]'=None) -> 'ParseTree':
3327 #--
3328 return self.parser.parse(text, start=start, on_error=on_error)
3330 def scan(self, text: str, start: Optional[str]=None) -> Iterator[Tuple[Tuple[int, int], 'ParseTree']]:
3331 #--
3332 if self.options.parser != 'lalr' or self.options.lexer != 'contextual': 3332 ↛ 3333line 3332 didn't jump to line 3333, because the condition on line 3332 was never true
3333 raise ValueError("scan requires parser='lalr' and lexer='contextual'")
3334 start_states = self.parser.parser._parse_table.start_states
3335 if start is None: 3335 ↛ 3339line 3335 didn't jump to line 3339, because the condition on line 3335 was never false
3336 if len(start_states) != 1: 3336 ↛ 3337line 3336 didn't jump to line 3337, because the condition on line 3336 was never true
3337 raise ValueError("Need to specify start")
3338 start, = start_states
3339 start_state = start_states[start]
3340 start_lex: BasicLexer = self.parser.lexer.lexers[start_state]
3341 pos = 0
3342 while True:
3343 start_pos = start_lex.scanner.search(text, pos)
3344 if start_pos is None:
3345 break
3346 valid_end = []
3347 ip = self.parse_interactive(text[start_pos:], start=start)
3348 tokens = ip.lexer_thread.lex(ip.parser_state)
3349 while True:
3350 try:
3351 token = next(tokens)
3352 ip.feed_token(token)
3353 except (UnexpectedInput, StopIteration):
3354 break
3355 if '$END' in ip.choices():
3356 valid_end.append((token, ip.copy()))
3357 for (last, pot) in valid_end[::-1]:
3358 try:
3359 res = pot.feed_eof(last)
3360 except UnexpectedInput:
3361 continue
3362 else:
3363 yield ((start_pos, start_pos + last.end_pos), res)
3364 pos = start_pos + last.end_pos
3365 break
3366 else:
3367 pos = start_pos + 1
3371class DedentError(LarkError):
3372 pass
3374class Indenter(PostLex, ABC):
3375 paren_level: int
3376 indent_level: List[int]
3378 def __init__(self) -> None:
3379 self.paren_level = 0
3380 self.indent_level = [0]
3381 assert self.tab_len > 0
3383 def handle_NL(self, token: Token) -> Iterator[Token]:
3384 if self.paren_level > 0:
3385 return
3387 yield token
3389 indent_str = token.rsplit('\n', 1)[1] ##
3391 indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len
3393 if indent > self.indent_level[-1]:
3394 self.indent_level.append(indent)
3395 yield Token.new_borrow_pos(self.INDENT_type, indent_str, token)
3396 else:
3397 while indent < self.indent_level[-1]:
3398 self.indent_level.pop()
3399 yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token)
3401 if indent != self.indent_level[-1]:
3402 raise DedentError('Unexpected dedent to column %s. Expected dedent to %s' % (indent, self.indent_level[-1]))
3404 def _process(self, stream):
3405 for token in stream:
3406 if token.type == self.NL_type:
3407 yield from self.handle_NL(token)
3408 else:
3409 yield token
3411 if token.type in self.OPEN_PAREN_types:
3412 self.paren_level += 1
3413 elif token.type in self.CLOSE_PAREN_types:
3414 self.paren_level -= 1
3415 assert self.paren_level >= 0
3417 while len(self.indent_level) > 1:
3418 self.indent_level.pop()
3419 yield Token(self.DEDENT_type, '')
3421 assert self.indent_level == [0], self.indent_level
3423 def process(self, stream):
3424 self.paren_level = 0
3425 self.indent_level = [0]
3426 return self._process(stream)
3428 ##
3430 @property
3431 def always_accept(self):
3432 return (self.NL_type,)
3434 @property
3435 @abstractmethod
3436 def NL_type(self) -> str:
3437 raise NotImplementedError()
3439 @property
3440 @abstractmethod
3441 def OPEN_PAREN_types(self) -> List[str]:
3442 raise NotImplementedError()
3444 @property
3445 @abstractmethod
3446 def CLOSE_PAREN_types(self) -> List[str]:
3447 raise NotImplementedError()
3449 @property
3450 @abstractmethod
3451 def INDENT_type(self) -> str:
3452 raise NotImplementedError()
3454 @property
3455 @abstractmethod
3456 def DEDENT_type(self) -> str:
3457 raise NotImplementedError()
3459 @property
3460 @abstractmethod
3461 def tab_len(self) -> int:
3462 raise NotImplementedError()
3465class PythonIndenter(Indenter):
3466 NL_type = '_NEWLINE'
3467 OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE']
3468 CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE']
3469 INDENT_type = '_INDENT'
3470 DEDENT_type = '_DEDENT'
3471 tab_len = 8
3474import pickle, zlib, base64
3475DATA = (
3476{'parser': {'lexer_conf': {'terminals': [{'@': 0}, {'@': 1}, {'@': 2}], 'ignore': [], 'g_regex_flags': 0, 'use_bytes': False, 'lexer_type': 'contextual', '__type__': 'LexerConf'}, 'parser_conf': {'rules': [{'@': 3}], 'start': ['start'], 'parser_type': 'lalr', '__type__': 'ParserConf'}, 'parser': {'tokens': {0: 'start', 1: '__ANON_0', 2: '$END', 3: 'RPAR', 4: 'ANYTHING_EXCEPT_PAREN'}, 'states': {0: {0: (0, 3), 1: (0, 4)}, 1: {2: (1, {'@': 3})}, 2: {3: (0, 1)}, 3: {}, 4: {4: (0, 2)}}, 'start_states': {'start': 0}, 'end_states': {'start': 3}}, '__type__': 'ParsingFrontend'}, 'rules': [{'@': 3}], 'options': {'debug': False, 'strict': False, 'keep_all_tokens': False, 'tree_class': None, 'cache': False, 'postlex': None, 'parser': 'lalr', 'lexer': 'contextual', 'transformer': None, 'start': ['start'], 'priority': 'normal', 'ambiguity': 'auto', 'regex': False, 'propagate_positions': False, 'lexer_callbacks': {}, 'maybe_placeholders': False, 'edit_terminals': None, 'g_regex_flags': 0, 'use_bytes': False, 'ordered_sets': True, 'import_paths': [], 'source_path': None, '_plugins': {}}, '__type__': 'Lark'}
3477)
3478MEMO = (
3479{0: {'name': 'ANYTHING_EXCEPT_PAREN', 'pattern': {'value': '[^)]+', 'flags': [], 'raw': '/[^)]+/', '_width': [1, 4294967295], '__type__': 'PatternRE'}, 'priority': 0, '__type__': 'TerminalDef'}, 1: {'name': '__ANON_0', 'pattern': {'value': '@Python(', 'flags': [], 'raw': '"@Python("', '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 2: {'name': 'RPAR', 'pattern': {'value': ')', 'flags': [], 'raw': '")"', '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 3: {'origin': {'name': Token('RULE', 'start'), '__type__': 'NonTerminal'}, 'expansion': [{'name': '__ANON_0', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'ANYTHING_EXCEPT_PAREN', 'filter_out': False, '__type__': 'Terminal'}, {'name': 'RPAR', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}}
3480)
3481Shift = 0
3482Reduce = 1
3483def Lark_StandAlone(**kwargs):
3484 return Lark._load_from_dict(DATA, MEMO, **kwargs)