Coverage for src/prisma/_vendor/lark_schema_parser.py: 49%

2127 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-08-27 18:25 +0000

1# The file was automatically generated by Lark v1.1.8 

2__version__ = "1.1.8" 

3 

4# 

5# 

6# Lark Stand-alone Generator Tool 

7# ---------------------------------- 

8# Generates a stand-alone LALR(1) parser 

9# 

10# Git: https://github.com/erezsh/lark 

11# Author: Erez Shinan (erezshin@gmail.com) 

12# 

13# 

14# >>> LICENSE 

15# 

16# This tool and its generated code use a separate license from Lark, 

17# and are subject to the terms of the Mozilla Public License, v. 2.0. 

18# If a copy of the MPL was not distributed with this 

19# file, You can obtain one at https://mozilla.org/MPL/2.0/. 

20# 

21# If you wish to purchase a commercial license for this tool and its 

22# generated code, you may contact me via email or otherwise. 

23# 

24# If MPL2 is incompatible with your free or open-source project, 

25# contact me and we'll work it out. 

26# 

27# 

28 

29from copy import deepcopy 

30from abc import ABC, abstractmethod 

31from types import ModuleType 

32from typing import ( 

33 TypeVar, Generic, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, 

34 Union, Iterable, IO, TYPE_CHECKING, overload, Sequence, 

35 Pattern as REPattern, ClassVar, Set, Mapping 

36) 

37 

38 

39class LarkError(Exception): 

40 pass 

41 

42 

43class ConfigurationError(LarkError, ValueError): 

44 pass 

45 

46 

47def assert_config(value, options: Collection, msg='Got %r, expected one of %s'): 

48 if value not in options: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true

49 raise ConfigurationError(msg % (value, options)) 

50 

51 

52class GrammarError(LarkError): 

53 pass 

54 

55 

56class ParseError(LarkError): 

57 pass 

58 

59 

60class LexError(LarkError): 

61 pass 

62 

63T = TypeVar('T') 

64 

65class UnexpectedInput(LarkError): 

66 #-- 

67 line: int 

68 column: int 

69 pos_in_stream = None 

70 state: Any 

71 _terminals_by_name = None 

72 interactive_parser: 'InteractiveParser' 

73 

74 def get_context(self, text: str, span: int=40) -> str: 

75 #-- 

76 assert self.pos_in_stream is not None, self 

77 pos = self.pos_in_stream 

78 start = max(pos - span, 0) 

79 end = pos + span 

80 if not isinstance(text, bytes): 80 ↛ 85line 80 didn't jump to line 85, because the condition on line 80 was never false

81 before = text[start:pos].rsplit('\n', 1)[-1] 

82 after = text[pos:end].split('\n', 1)[0] 

83 return before + after + '\n' + ' ' * len(before.expandtabs()) + '^\n' 

84 else: 

85 before = text[start:pos].rsplit(b'\n', 1)[-1] 

86 after = text[pos:end].split(b'\n', 1)[0] 

87 return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace") 

88 

89 def match_examples(self, parse_fn: 'Callable[[str], Tree]', 

90 examples: Union[Mapping[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], 

91 token_type_match_fallback: bool=False, 

92 use_accepts: bool=True 

93 ) -> Optional[T]: 

94 #-- 

95 assert self.state is not None, "Not supported for this exception" 

96 

97 if isinstance(examples, Mapping): 

98 examples = examples.items() 

99 

100 candidate = (None, False) 

101 for i, (label, example) in enumerate(examples): 

102 assert not isinstance(example, str), "Expecting a list" 

103 

104 for j, malformed in enumerate(example): 

105 try: 

106 parse_fn(malformed) 

107 except UnexpectedInput as ut: 

108 if ut.state == self.state: 

109 if ( 

110 use_accepts 

111 and isinstance(self, UnexpectedToken) 

112 and isinstance(ut, UnexpectedToken) 

113 and ut.accepts != self.accepts 

114 ): 

115 logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % 

116 (self.state, self.accepts, ut.accepts, i, j)) 

117 continue 

118 if ( 

119 isinstance(self, (UnexpectedToken, UnexpectedEOF)) 

120 and isinstance(ut, (UnexpectedToken, UnexpectedEOF)) 

121 ): 

122 if ut.token == self.token: ## 

123 

124 logger.debug("Exact Match at example [%s][%s]" % (i, j)) 

125 return label 

126 

127 if token_type_match_fallback: 

128 ## 

129 

130 if (ut.token.type == self.token.type) and not candidate[-1]: 

131 logger.debug("Token Type Fallback at example [%s][%s]" % (i, j)) 

132 candidate = label, True 

133 

134 if candidate[0] is None: 

135 logger.debug("Same State match at example [%s][%s]" % (i, j)) 

136 candidate = label, False 

137 

138 return candidate[0] 

139 

140 def _format_expected(self, expected): 

141 if self._terminals_by_name: 141 ↛ 144line 141 didn't jump to line 144, because the condition on line 141 was never false

142 d = self._terminals_by_name 

143 expected = [d[t_name].user_repr() if t_name in d else t_name for t_name in expected] 

144 return "Expected one of: \n\t* %s\n" % '\n\t* '.join(expected) 

145 

146 

147class UnexpectedEOF(ParseError, UnexpectedInput): 

148 #-- 

149 expected: 'List[Token]' 

150 

151 def __init__(self, expected, state=None, terminals_by_name=None): 

152 super(UnexpectedEOF, self).__init__() 

153 

154 self.expected = expected 

155 self.state = state 

156 from .lexer import Token 

157 self.token = Token("<EOF>", "") ## 

158 

159 self.pos_in_stream = -1 

160 self.line = -1 

161 self.column = -1 

162 self._terminals_by_name = terminals_by_name 

163 

164 

165 def __str__(self): 

166 message = "Unexpected end-of-input. " 

167 message += self._format_expected(self.expected) 

168 return message 

169 

170 

171class UnexpectedCharacters(LexError, UnexpectedInput): 

172 #-- 

173 

174 allowed: Set[str] 

175 considered_tokens: Set[Any] 

176 

177 def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None, 

178 terminals_by_name=None, considered_rules=None): 

179 super(UnexpectedCharacters, self).__init__() 

180 

181 ## 

182 

183 self.line = line 

184 self.column = column 

185 self.pos_in_stream = lex_pos 

186 self.state = state 

187 self._terminals_by_name = terminals_by_name 

188 

189 self.allowed = allowed 

190 self.considered_tokens = considered_tokens 

191 self.considered_rules = considered_rules 

192 self.token_history = token_history 

193 

194 if isinstance(seq, bytes): 194 ↛ 195line 194 didn't jump to line 195, because the condition on line 194 was never true

195 self.char = seq[lex_pos:lex_pos + 1].decode("ascii", "backslashreplace") 

196 else: 

197 self.char = seq[lex_pos] 

198 self._context = self.get_context(seq) 

199 

200 

201 def __str__(self): 

202 message = "No terminal matches '%s' in the current parser context, at line %d col %d" % (self.char, self.line, self.column) 

203 message += '\n\n' + self._context 

204 if self.allowed: 204 ↛ 206line 204 didn't jump to line 206, because the condition on line 204 was never false

205 message += self._format_expected(self.allowed) 

206 if self.token_history: 206 ↛ 208line 206 didn't jump to line 208, because the condition on line 206 was never false

207 message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in self.token_history) 

208 return message 

209 

210 

211class UnexpectedToken(ParseError, UnexpectedInput): 

212 #-- 

213 

214 expected: Set[str] 

215 considered_rules: Set[str] 

216 

217 def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None): 

218 super(UnexpectedToken, self).__init__() 

219 

220 ## 

221 

222 self.line = getattr(token, 'line', '?') 

223 self.column = getattr(token, 'column', '?') 

224 self.pos_in_stream = getattr(token, 'start_pos', None) 

225 self.state = state 

226 

227 self.token = token 

228 self.expected = expected ## 

229 

230 self._accepts = NO_VALUE 

231 self.considered_rules = considered_rules 

232 self.interactive_parser = interactive_parser 

233 self._terminals_by_name = terminals_by_name 

234 self.token_history = token_history 

235 

236 

237 @property 

238 def accepts(self) -> Set[str]: 

239 if self._accepts is NO_VALUE: 239 ↛ 241line 239 didn't jump to line 241, because the condition on line 239 was never false

240 self._accepts = self.interactive_parser and self.interactive_parser.accepts() 

241 return self._accepts 

242 

243 def __str__(self): 

244 message = ("Unexpected token %r at line %s, column %s.\n%s" 

245 % (self.token, self.line, self.column, self._format_expected(self.accepts or self.expected))) 

246 if self.token_history: 246 ↛ 249line 246 didn't jump to line 249, because the condition on line 246 was never false

247 message += "Previous tokens: %r\n" % self.token_history 

248 

249 return message 

250 

251 

252 

253class VisitError(LarkError): 

254 #-- 

255 

256 obj: 'Union[Tree, Token]' 

257 orig_exc: Exception 

258 

259 def __init__(self, rule, obj, orig_exc): 

260 message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc) 

261 super(VisitError, self).__init__(message) 

262 

263 self.rule = rule 

264 self.obj = obj 

265 self.orig_exc = orig_exc 

266 

267 

268class MissingVariableError(LarkError): 

269 pass 

270 

271 

272import sys, re 

273import logging 

274 

275logger: logging.Logger = logging.getLogger("lark") 

276logger.addHandler(logging.StreamHandler()) 

277## 

278 

279## 

280 

281logger.setLevel(logging.CRITICAL) 

282 

283 

284NO_VALUE = object() 

285 

286T = TypeVar("T") 

287 

288 

289def classify(seq: Iterable, key: Optional[Callable] = None, value: Optional[Callable] = None) -> Dict: 

290 d: Dict[Any, Any] = {} 

291 for item in seq: 

292 k = key(item) if (key is not None) else item 

293 v = value(item) if (value is not None) else item 

294 try: 

295 d[k].append(v) 

296 except KeyError: 

297 d[k] = [v] 

298 return d 

299 

300 

301def _deserialize(data: Any, namespace: Dict[str, Any], memo: Dict) -> Any: 

302 if isinstance(data, dict): 

303 if '__type__' in data: ## 

304 

305 class_ = namespace[data['__type__']] 

306 return class_.deserialize(data, memo) 

307 elif '@' in data: 

308 return memo[data['@']] 

309 return {key:_deserialize(value, namespace, memo) for key, value in data.items()} 

310 elif isinstance(data, list): 

311 return [_deserialize(value, namespace, memo) for value in data] 

312 return data 

313 

314 

315_T = TypeVar("_T", bound="Serialize") 

316 

317class Serialize: 

318 #-- 

319 

320 def memo_serialize(self, types_to_memoize: List) -> Any: 

321 memo = SerializeMemoizer(types_to_memoize) 

322 return self.serialize(memo), memo.serialize() 

323 

324 def serialize(self, memo = None) -> Dict[str, Any]: 

325 if memo and memo.in_types(self): 

326 return {'@': memo.memoized.get(self)} 

327 

328 fields = getattr(self, '__serialize_fields__') 

329 res = {f: _serialize(getattr(self, f), memo) for f in fields} 

330 res['__type__'] = type(self).__name__ 

331 if hasattr(self, '_serialize'): 

332 self._serialize(res, memo) ## 

333 

334 return res 

335 

336 @classmethod 

337 def deserialize(cls: Type[_T], data: Dict[str, Any], memo: Dict[int, Any]) -> _T: 

338 namespace = getattr(cls, '__serialize_namespace__', []) 

339 namespace = {c.__name__:c for c in namespace} 

340 

341 fields = getattr(cls, '__serialize_fields__') 

342 

343 if '@' in data: 

344 return memo[data['@']] 

345 

346 inst = cls.__new__(cls) 

347 for f in fields: 

348 try: 

349 setattr(inst, f, _deserialize(data[f], namespace, memo)) 

350 except KeyError as e: 

351 raise KeyError("Cannot find key for class", cls, e) 

352 

353 if hasattr(inst, '_deserialize'): 

354 inst._deserialize() ## 

355 

356 

357 return inst 

358 

359 

360class SerializeMemoizer(Serialize): 

361 #-- 

362 

363 __serialize_fields__ = 'memoized', 

364 

365 def __init__(self, types_to_memoize: List) -> None: 

366 self.types_to_memoize = tuple(types_to_memoize) 

367 self.memoized = Enumerator() 

368 

369 def in_types(self, value: Serialize) -> bool: 

370 return isinstance(value, self.types_to_memoize) 

371 

372 def serialize(self) -> Dict[int, Any]: ## 

373 

374 return _serialize(self.memoized.reversed(), None) 

375 

376 @classmethod 

377 def deserialize(cls, data: Dict[int, Any], namespace: Dict[str, Any], memo: Dict[Any, Any]) -> Dict[int, Any]: ## 

378 

379 return _deserialize(data, namespace, memo) 

380 

381 

382try: 

383 import regex 

384 _has_regex = True 

385except ImportError: 

386 _has_regex = False 

387 

388if sys.version_info >= (3, 11): 

389 import re._parser as sre_parse 

390 import re._constants as sre_constants 

391else: 

392 import sre_parse 

393 import sre_constants 

394 

395categ_pattern = re.compile(r'\\p{[A-Za-z_]+}') 

396 

397def get_regexp_width(expr: str) -> Union[Tuple[int, int], List[int]]: 

398 if _has_regex: 

399 ## 

400 

401 ## 

402 

403 ## 

404 

405 regexp_final = re.sub(categ_pattern, 'A', expr) 

406 else: 

407 if re.search(categ_pattern, expr): 

408 raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr) 

409 regexp_final = expr 

410 try: 

411 ## 

412 

413 return [int(x) for x in sre_parse.parse(regexp_final).getwidth()] ## 

414 

415 except sre_constants.error: 

416 if not _has_regex: 

417 raise ValueError(expr) 

418 else: 

419 ## 

420 

421 ## 

422 

423 c = regex.compile(regexp_final) 

424 if c.match('') is None: 

425 ## 

426 

427 return 1, int(sre_constants.MAXREPEAT) 

428 else: 

429 return 0, int(sre_constants.MAXREPEAT) 

430 

431 

432from collections import OrderedDict 

433 

434class Meta: 

435 

436 empty: bool 

437 line: int 

438 column: int 

439 start_pos: int 

440 end_line: int 

441 end_column: int 

442 end_pos: int 

443 orig_expansion: 'List[TerminalDef]' 

444 match_tree: bool 

445 

446 def __init__(self): 

447 self.empty = True 

448 

449 

450_Leaf_T = TypeVar("_Leaf_T") 

451Branch = Union[_Leaf_T, 'Tree[_Leaf_T]'] 

452 

453 

454class Tree(Generic[_Leaf_T]): 

455 #-- 

456 

457 data: str 

458 children: 'List[Branch[_Leaf_T]]' 

459 

460 def __init__(self, data: str, children: 'List[Branch[_Leaf_T]]', meta: Optional[Meta]=None) -> None: 

461 self.data = data 

462 self.children = children 

463 self._meta = meta 

464 

465 @property 

466 def meta(self) -> Meta: 

467 if self._meta is None: 467 ↛ 469line 467 didn't jump to line 469, because the condition on line 467 was never false

468 self._meta = Meta() 

469 return self._meta 

470 

471 def __repr__(self): 

472 return 'Tree(%r, %r)' % (self.data, self.children) 

473 

474 def _pretty_label(self): 

475 return self.data 

476 

477 def _pretty(self, level, indent_str): 

478 yield f'{indent_str*level}{self._pretty_label()}' 

479 if len(self.children) == 1 and not isinstance(self.children[0], Tree): 

480 yield f'\t{self.children[0]}\n' 

481 else: 

482 yield '\n' 

483 for n in self.children: 

484 if isinstance(n, Tree): 

485 yield from n._pretty(level+1, indent_str) 

486 else: 

487 yield f'{indent_str*(level+1)}{n}\n' 

488 

489 def pretty(self, indent_str: str=' ') -> str: 

490 #-- 

491 return ''.join(self._pretty(0, indent_str)) 

492 

493 def __rich__(self, parent:Optional['rich.tree.Tree']=None) -> 'rich.tree.Tree': 

494 #-- 

495 return self._rich(parent) 

496 

497 def _rich(self, parent): 

498 if parent: 

499 tree = parent.add(f'[bold]{self.data}[/bold]') 

500 else: 

501 import rich.tree 

502 tree = rich.tree.Tree(self.data) 

503 

504 for c in self.children: 

505 if isinstance(c, Tree): 

506 c._rich(tree) 

507 else: 

508 tree.add(f'[green]{c}[/green]') 

509 

510 return tree 

511 

512 def __eq__(self, other): 

513 try: 

514 return self.data == other.data and self.children == other.children 

515 except AttributeError: 

516 return False 

517 

518 def __ne__(self, other): 

519 return not (self == other) 

520 

521 def __hash__(self) -> int: 

522 return hash((self.data, tuple(self.children))) 

523 

524 def iter_subtrees(self) -> 'Iterator[Tree[_Leaf_T]]': 

525 #-- 

526 queue = [self] 

527 subtrees = OrderedDict() 

528 for subtree in queue: 

529 subtrees[id(subtree)] = subtree 

530 ## 

531 

532 queue += [c for c in reversed(subtree.children) ## 

533 

534 if isinstance(c, Tree) and id(c) not in subtrees] 

535 

536 del queue 

537 return reversed(list(subtrees.values())) 

538 

539 def iter_subtrees_topdown(self): 

540 #-- 

541 stack = [self] 

542 stack_append = stack.append 

543 stack_pop = stack.pop 

544 while stack: 

545 node = stack_pop() 

546 if not isinstance(node, Tree): 

547 continue 

548 yield node 

549 for child in reversed(node.children): 

550 stack_append(child) 

551 

552 def find_pred(self, pred: 'Callable[[Tree[_Leaf_T]], bool]') -> 'Iterator[Tree[_Leaf_T]]': 

553 #-- 

554 return filter(pred, self.iter_subtrees()) 

555 

556 def find_data(self, data: str) -> 'Iterator[Tree[_Leaf_T]]': 

557 #-- 

558 return self.find_pred(lambda t: t.data == data) 

559 

560 

561from functools import wraps, update_wrapper 

562from inspect import getmembers, getmro 

563 

564_Return_T = TypeVar('_Return_T') 

565_Return_V = TypeVar('_Return_V') 

566_Leaf_T = TypeVar('_Leaf_T') 

567_Leaf_U = TypeVar('_Leaf_U') 

568_R = TypeVar('_R') 

569_FUNC = Callable[..., _Return_T] 

570_DECORATED = Union[_FUNC, type] 

571 

572class _DiscardType: 

573 #-- 

574 

575 def __repr__(self): 

576 return "lark.visitors.Discard" 

577 

578Discard = _DiscardType() 

579 

580## 

581 

582 

583class _Decoratable: 

584 #-- 

585 

586 @classmethod 

587 def _apply_v_args(cls, visit_wrapper): 

588 mro = getmro(cls) 

589 assert mro[0] is cls 

590 libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)} 

591 for name, value in getmembers(cls): 

592 

593 ## 

594 

595 if name.startswith('_') or (name in libmembers and name not in cls.__dict__): 

596 continue 

597 if not callable(value): 

598 continue 

599 

600 ## 

601 

602 if isinstance(cls.__dict__[name], _VArgsWrapper): 

603 continue 

604 

605 setattr(cls, name, _VArgsWrapper(cls.__dict__[name], visit_wrapper)) 

606 return cls 

607 

608 def __class_getitem__(cls, _): 

609 return cls 

610 

611 

612class Transformer(_Decoratable, ABC, Generic[_Leaf_T, _Return_T]): 

613 #-- 

614 __visit_tokens__ = True ## 

615 

616 

617 def __init__(self, visit_tokens: bool=True) -> None: 

618 self.__visit_tokens__ = visit_tokens 

619 

620 def _call_userfunc(self, tree, new_children=None): 

621 ## 

622 

623 children = new_children if new_children is not None else tree.children 

624 try: 

625 f = getattr(self, tree.data) 

626 except AttributeError: 

627 return self.__default__(tree.data, children, tree.meta) 

628 else: 

629 try: 

630 wrapper = getattr(f, 'visit_wrapper', None) 

631 if wrapper is not None: 631 ↛ 632line 631 didn't jump to line 632, because the condition on line 631 was never true

632 return f.visit_wrapper(f, tree.data, children, tree.meta) 

633 else: 

634 return f(children) 

635 except GrammarError: 

636 raise 

637 except Exception as e: 

638 raise VisitError(tree.data, tree, e) 

639 

640 def _call_userfunc_token(self, token): 

641 try: 

642 f = getattr(self, token.type) 

643 except AttributeError: 

644 return self.__default_token__(token) 

645 else: 

646 try: 

647 return f(token) 

648 except GrammarError: 

649 raise 

650 except Exception as e: 

651 raise VisitError(token.type, token, e) 

652 

653 def _transform_children(self, children): 

654 for c in children: 

655 if isinstance(c, Tree): 

656 res = self._transform_tree(c) 

657 elif self.__visit_tokens__ and isinstance(c, Token): 657 ↛ 660line 657 didn't jump to line 660, because the condition on line 657 was never false

658 res = self._call_userfunc_token(c) 

659 else: 

660 res = c 

661 

662 if res is not Discard: 662 ↛ 654line 662 didn't jump to line 654, because the condition on line 662 was never false

663 yield res 

664 

665 def _transform_tree(self, tree): 

666 children = list(self._transform_children(tree.children)) 

667 return self._call_userfunc(tree, children) 

668 

669 def transform(self, tree: Tree[_Leaf_T]) -> _Return_T: 

670 #-- 

671 return self._transform_tree(tree) 

672 

673 def __mul__( 

674 self: 'Transformer[_Leaf_T, Tree[_Leaf_U]]', 

675 other: 'Union[Transformer[_Leaf_U, _Return_V], TransformerChain[_Leaf_U, _Return_V,]]' 

676 ) -> 'TransformerChain[_Leaf_T, _Return_V]': 

677 #-- 

678 return TransformerChain(self, other) 

679 

680 def __default__(self, data, children, meta): 

681 #-- 

682 return Tree(data, children, meta) 

683 

684 def __default_token__(self, token): 

685 #-- 

686 return token 

687 

688 

689def merge_transformers(base_transformer=None, **transformers_to_merge): 

690 #-- 

691 if base_transformer is None: 

692 base_transformer = Transformer() 

693 for prefix, transformer in transformers_to_merge.items(): 

694 for method_name in dir(transformer): 

695 method = getattr(transformer, method_name) 

696 if not callable(method): 

697 continue 

698 if method_name.startswith("_") or method_name == "transform": 

699 continue 

700 prefixed_method = prefix + "__" + method_name 

701 if hasattr(base_transformer, prefixed_method): 

702 raise AttributeError("Cannot merge: method '%s' appears more than once" % prefixed_method) 

703 

704 setattr(base_transformer, prefixed_method, method) 

705 

706 return base_transformer 

707 

708 

709class InlineTransformer(Transformer): ## 

710 

711 def _call_userfunc(self, tree, new_children=None): 

712 ## 

713 

714 children = new_children if new_children is not None else tree.children 

715 try: 

716 f = getattr(self, tree.data) 

717 except AttributeError: 

718 return self.__default__(tree.data, children, tree.meta) 

719 else: 

720 return f(*children) 

721 

722 

723class TransformerChain(Generic[_Leaf_T, _Return_T]): 

724 

725 transformers: 'Tuple[Union[Transformer, TransformerChain], ...]' 

726 

727 def __init__(self, *transformers: 'Union[Transformer, TransformerChain]') -> None: 

728 self.transformers = transformers 

729 

730 def transform(self, tree: Tree[_Leaf_T]) -> _Return_T: 

731 for t in self.transformers: 

732 tree = t.transform(tree) 

733 return cast(_Return_T, tree) 

734 

735 def __mul__( 

736 self: 'TransformerChain[_Leaf_T, Tree[_Leaf_U]]', 

737 other: 'Union[Transformer[_Leaf_U, _Return_V], TransformerChain[_Leaf_U, _Return_V]]' 

738 ) -> 'TransformerChain[_Leaf_T, _Return_V]': 

739 return TransformerChain(*self.transformers + (other,)) 

740 

741 

742class Transformer_InPlace(Transformer[_Leaf_T, _Return_T]): 

743 #-- 

744 def _transform_tree(self, tree): ## 

745 

746 return self._call_userfunc(tree) 

747 

748 def transform(self, tree: Tree[_Leaf_T]) -> _Return_T: 

749 for subtree in tree.iter_subtrees(): 

750 subtree.children = list(self._transform_children(subtree.children)) 

751 

752 return self._transform_tree(tree) 

753 

754 

755class Transformer_NonRecursive(Transformer[_Leaf_T, _Return_T]): 

756 #-- 

757 

758 def transform(self, tree: Tree[_Leaf_T]) -> _Return_T: 

759 ## 

760 

761 rev_postfix = [] 

762 q: List[Branch[_Leaf_T]] = [tree] 

763 while q: 

764 t = q.pop() 

765 rev_postfix.append(t) 

766 if isinstance(t, Tree): 

767 q += t.children 

768 

769 ## 

770 

771 stack: List = [] 

772 for x in reversed(rev_postfix): 

773 if isinstance(x, Tree): 

774 size = len(x.children) 

775 if size: 

776 args = stack[-size:] 

777 del stack[-size:] 

778 else: 

779 args = [] 

780 

781 res = self._call_userfunc(x, args) 

782 if res is not Discard: 

783 stack.append(res) 

784 

785 elif self.__visit_tokens__ and isinstance(x, Token): 

786 res = self._call_userfunc_token(x) 

787 if res is not Discard: 

788 stack.append(res) 

789 else: 

790 stack.append(x) 

791 

792 result, = stack ## 

793 

794 ## 

795 

796 ## 

797 

798 ## 

799 

800 return cast(_Return_T, result) 

801 

802 

803class Transformer_InPlaceRecursive(Transformer): 

804 #-- 

805 def _transform_tree(self, tree): 

806 tree.children = list(self._transform_children(tree.children)) 

807 return self._call_userfunc(tree) 

808 

809 

810## 

811 

812 

813class VisitorBase: 

814 def _call_userfunc(self, tree): 

815 return getattr(self, tree.data, self.__default__)(tree) 

816 

817 def __default__(self, tree): 

818 #-- 

819 return tree 

820 

821 def __class_getitem__(cls, _): 

822 return cls 

823 

824 

825class Visitor(VisitorBase, ABC, Generic[_Leaf_T]): 

826 #-- 

827 

828 def visit(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]: 

829 #-- 

830 for subtree in tree.iter_subtrees(): 

831 self._call_userfunc(subtree) 

832 return tree 

833 

834 def visit_topdown(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]: 

835 #-- 

836 for subtree in tree.iter_subtrees_topdown(): 

837 self._call_userfunc(subtree) 

838 return tree 

839 

840 

841class Visitor_Recursive(VisitorBase, Generic[_Leaf_T]): 

842 #-- 

843 

844 def visit(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]: 

845 #-- 

846 for child in tree.children: 

847 if isinstance(child, Tree): 

848 self.visit(child) 

849 

850 self._call_userfunc(tree) 

851 return tree 

852 

853 def visit_topdown(self,tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]: 

854 #-- 

855 self._call_userfunc(tree) 

856 

857 for child in tree.children: 

858 if isinstance(child, Tree): 

859 self.visit_topdown(child) 

860 

861 return tree 

862 

863 

864class Interpreter(_Decoratable, ABC, Generic[_Leaf_T, _Return_T]): 

865 #-- 

866 

867 def visit(self, tree: Tree[_Leaf_T]) -> _Return_T: 

868 ## 

869 

870 ## 

871 

872 ## 

873 

874 return self._visit_tree(tree) 

875 

876 def _visit_tree(self, tree: Tree[_Leaf_T]): 

877 f = getattr(self, tree.data) 

878 wrapper = getattr(f, 'visit_wrapper', None) 

879 if wrapper is not None: 

880 return f.visit_wrapper(f, tree.data, tree.children, tree.meta) 

881 else: 

882 return f(tree) 

883 

884 def visit_children(self, tree: Tree[_Leaf_T]) -> List: 

885 return [self._visit_tree(child) if isinstance(child, Tree) else child 

886 for child in tree.children] 

887 

888 def __getattr__(self, name): 

889 return self.__default__ 

890 

891 def __default__(self, tree): 

892 return self.visit_children(tree) 

893 

894 

895_InterMethod = Callable[[Type[Interpreter], _Return_T], _R] 

896 

897def visit_children_decor(func: _InterMethod) -> _InterMethod: 

898 #-- 

899 @wraps(func) 

900 def inner(cls, tree): 

901 values = cls.visit_children(tree) 

902 return func(cls, values) 

903 return inner 

904 

905## 

906 

907 

908def _apply_v_args(obj, visit_wrapper): 

909 try: 

910 _apply = obj._apply_v_args 

911 except AttributeError: 

912 return _VArgsWrapper(obj, visit_wrapper) 

913 else: 

914 return _apply(visit_wrapper) 

915 

916 

917class _VArgsWrapper: 

918 #-- 

919 base_func: Callable 

920 

921 def __init__(self, func: Callable, visit_wrapper: Callable[[Callable, str, list, Any], Any]): 

922 if isinstance(func, _VArgsWrapper): 

923 func = func.base_func 

924 ## 

925 

926 self.base_func = func ## 

927 

928 self.visit_wrapper = visit_wrapper 

929 update_wrapper(self, func) 

930 

931 def __call__(self, *args, **kwargs): 

932 return self.base_func(*args, **kwargs) 

933 

934 def __get__(self, instance, owner=None): 

935 try: 

936 ## 

937 

938 ## 

939 

940 g = type(self.base_func).__get__ 

941 except AttributeError: 

942 return self 

943 else: 

944 return _VArgsWrapper(g(self.base_func, instance, owner), self.visit_wrapper) 

945 

946 def __set_name__(self, owner, name): 

947 try: 

948 f = type(self.base_func).__set_name__ 

949 except AttributeError: 

950 return 

951 else: 

952 f(self.base_func, owner, name) 

953 

954 

955def _vargs_inline(f, _data, children, _meta): 

956 return f(*children) 

957def _vargs_meta_inline(f, _data, children, meta): 

958 return f(meta, *children) 

959def _vargs_meta(f, _data, children, meta): 

960 return f(meta, children) 

961def _vargs_tree(f, data, children, meta): 

962 return f(Tree(data, children, meta)) 

963 

964 

965def v_args(inline: bool = False, meta: bool = False, tree: bool = False, wrapper: Optional[Callable] = None) -> Callable[[_DECORATED], _DECORATED]: 

966 #-- 

967 if tree and (meta or inline): 

968 raise ValueError("Visitor functions cannot combine 'tree' with 'meta' or 'inline'.") 

969 

970 func = None 

971 if meta: 

972 if inline: 

973 func = _vargs_meta_inline 

974 else: 

975 func = _vargs_meta 

976 elif inline: 

977 func = _vargs_inline 

978 elif tree: 

979 func = _vargs_tree 

980 

981 if wrapper is not None: 

982 if func is not None: 

983 raise ValueError("Cannot use 'wrapper' along with 'tree', 'meta' or 'inline'.") 

984 func = wrapper 

985 

986 def _visitor_args_dec(obj): 

987 return _apply_v_args(obj, func) 

988 return _visitor_args_dec 

989 

990 

991 

992TOKEN_DEFAULT_PRIORITY = 0 

993 

994 

995class Symbol(Serialize): 

996 __slots__ = ('name',) 

997 

998 name: str 

999 is_term: ClassVar[bool] = NotImplemented 

1000 

1001 def __init__(self, name: str) -> None: 

1002 self.name = name 

1003 

1004 def __eq__(self, other): 

1005 assert isinstance(other, Symbol), other 

1006 return self.is_term == other.is_term and self.name == other.name 

1007 

1008 def __ne__(self, other): 

1009 return not (self == other) 

1010 

1011 def __hash__(self): 

1012 return hash(self.name) 

1013 

1014 def __repr__(self): 

1015 return '%s(%r)' % (type(self).__name__, self.name) 

1016 

1017 fullrepr = property(__repr__) 

1018 

1019 def renamed(self, f): 

1020 return type(self)(f(self.name)) 

1021 

1022 

1023class Terminal(Symbol): 

1024 __serialize_fields__ = 'name', 'filter_out' 

1025 

1026 is_term: ClassVar[bool] = True 

1027 

1028 def __init__(self, name, filter_out=False): 

1029 self.name = name 

1030 self.filter_out = filter_out 

1031 

1032 @property 

1033 def fullrepr(self): 

1034 return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out) 

1035 

1036 def renamed(self, f): 

1037 return type(self)(f(self.name), self.filter_out) 

1038 

1039 

1040class NonTerminal(Symbol): 

1041 __serialize_fields__ = 'name', 

1042 

1043 is_term: ClassVar[bool] = False 

1044 

1045 

1046class RuleOptions(Serialize): 

1047 __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' 

1048 

1049 keep_all_tokens: bool 

1050 expand1: bool 

1051 priority: Optional[int] 

1052 template_source: Optional[str] 

1053 empty_indices: Tuple[bool, ...] 

1054 

1055 def __init__(self, keep_all_tokens: bool=False, expand1: bool=False, priority: Optional[int]=None, template_source: Optional[str]=None, empty_indices: Tuple[bool, ...]=()) -> None: 

1056 self.keep_all_tokens = keep_all_tokens 

1057 self.expand1 = expand1 

1058 self.priority = priority 

1059 self.template_source = template_source 

1060 self.empty_indices = empty_indices 

1061 

1062 def __repr__(self): 

1063 return 'RuleOptions(%r, %r, %r, %r)' % ( 

1064 self.keep_all_tokens, 

1065 self.expand1, 

1066 self.priority, 

1067 self.template_source 

1068 ) 

1069 

1070 

1071class Rule(Serialize): 

1072 #-- 

1073 __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash') 

1074 

1075 __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options' 

1076 __serialize_namespace__ = Terminal, NonTerminal, RuleOptions 

1077 

1078 origin: NonTerminal 

1079 expansion: Sequence[Symbol] 

1080 order: int 

1081 alias: Optional[str] 

1082 options: RuleOptions 

1083 _hash: int 

1084 

1085 def __init__(self, origin: NonTerminal, expansion: Sequence[Symbol], 

1086 order: int=0, alias: Optional[str]=None, options: Optional[RuleOptions]=None): 

1087 self.origin = origin 

1088 self.expansion = expansion 

1089 self.alias = alias 

1090 self.order = order 

1091 self.options = options or RuleOptions() 

1092 self._hash = hash((self.origin, tuple(self.expansion))) 

1093 

1094 def _deserialize(self): 

1095 self._hash = hash((self.origin, tuple(self.expansion))) 

1096 

1097 def __str__(self): 

1098 return '<%s : %s>' % (self.origin.name, ' '.join(x.name for x in self.expansion)) 

1099 

1100 def __repr__(self): 

1101 return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options) 

1102 

1103 def __hash__(self): 

1104 return self._hash 

1105 

1106 def __eq__(self, other): 

1107 if not isinstance(other, Rule): 1107 ↛ 1109line 1107 didn't jump to line 1109, because the condition on line 1107 was never false

1108 return False 

1109 return self.origin == other.origin and self.expansion == other.expansion 

1110 

1111 

1112 

1113from copy import copy 

1114 

1115try: ## 

1116 

1117 has_interegular = bool(interegular) 

1118except NameError: 

1119 has_interegular = False 

1120 

1121class Pattern(Serialize, ABC): 

1122 #-- 

1123 

1124 value: str 

1125 flags: Collection[str] 

1126 raw: Optional[str] 

1127 type: ClassVar[str] 

1128 

1129 def __init__(self, value: str, flags: Collection[str] = (), raw: Optional[str] = None) -> None: 

1130 self.value = value 

1131 self.flags = frozenset(flags) 

1132 self.raw = raw 

1133 

1134 def __repr__(self): 

1135 return repr(self.to_regexp()) 

1136 

1137 ## 

1138 

1139 def __hash__(self): 

1140 return hash((type(self), self.value, self.flags)) 

1141 

1142 def __eq__(self, other): 

1143 return type(self) == type(other) and self.value == other.value and self.flags == other.flags 

1144 

1145 @abstractmethod 

1146 def to_regexp(self) -> str: 

1147 raise NotImplementedError() 

1148 

1149 @property 

1150 @abstractmethod 

1151 def min_width(self) -> int: 

1152 raise NotImplementedError() 

1153 

1154 @property 

1155 @abstractmethod 

1156 def max_width(self) -> int: 

1157 raise NotImplementedError() 

1158 

1159 def _get_flags(self, value): 

1160 for f in self.flags: 1160 ↛ 1161line 1160 didn't jump to line 1161, because the loop on line 1160 never started

1161 value = ('(?%s:%s)' % (f, value)) 

1162 return value 

1163 

1164 

1165class PatternStr(Pattern): 

1166 __serialize_fields__ = 'value', 'flags', 'raw' 

1167 

1168 type: ClassVar[str] = "str" 

1169 

1170 def to_regexp(self) -> str: 

1171 return self._get_flags(re.escape(self.value)) 

1172 

1173 @property 

1174 def min_width(self) -> int: 

1175 return len(self.value) 

1176 

1177 @property 

1178 def max_width(self) -> int: 

1179 return len(self.value) 

1180 

1181 

1182class PatternRE(Pattern): 

1183 __serialize_fields__ = 'value', 'flags', 'raw', '_width' 

1184 

1185 type: ClassVar[str] = "re" 

1186 

1187 def to_regexp(self) -> str: 

1188 return self._get_flags(self.value) 

1189 

1190 _width = None 

1191 def _get_width(self): 

1192 if self._width is None: 1192 ↛ 1193line 1192 didn't jump to line 1193, because the condition on line 1192 was never true

1193 self._width = get_regexp_width(self.to_regexp()) 

1194 return self._width 

1195 

1196 @property 

1197 def min_width(self) -> int: 

1198 return self._get_width()[0] 

1199 

1200 @property 

1201 def max_width(self) -> int: 

1202 return self._get_width()[1] 

1203 

1204 

1205class TerminalDef(Serialize): 

1206 #-- 

1207 __serialize_fields__ = 'name', 'pattern', 'priority' 

1208 __serialize_namespace__ = PatternStr, PatternRE 

1209 

1210 name: str 

1211 pattern: Pattern 

1212 priority: int 

1213 

1214 def __init__(self, name: str, pattern: Pattern, priority: int = TOKEN_DEFAULT_PRIORITY) -> None: 

1215 assert isinstance(pattern, Pattern), pattern 

1216 self.name = name 

1217 self.pattern = pattern 

1218 self.priority = priority 

1219 

1220 def __repr__(self): 

1221 return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) 

1222 

1223 def user_repr(self) -> str: 

1224 if self.name.startswith('__'): ## 1224 ↛ 1226line 1224 didn't jump to line 1226, because the condition on line 1224 was never true

1225 

1226 return self.pattern.raw or self.name 

1227 else: 

1228 return self.name 

1229 

1230_T = TypeVar('_T', bound="Token") 

1231 

1232class Token(str): 

1233 #-- 

1234 __slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos') 

1235 

1236 __match_args__ = ('type', 'value') 

1237 

1238 type: str 

1239 start_pos: Optional[int] 

1240 value: Any 

1241 line: Optional[int] 

1242 column: Optional[int] 

1243 end_line: Optional[int] 

1244 end_column: Optional[int] 

1245 end_pos: Optional[int] 

1246 

1247 

1248 @overload 

1249 def __new__( 

1250 cls, 

1251 type: str, 

1252 value: Any, 

1253 start_pos: Optional[int] = None, 

1254 line: Optional[int] = None, 

1255 column: Optional[int] = None, 

1256 end_line: Optional[int] = None, 

1257 end_column: Optional[int] = None, 

1258 end_pos: Optional[int] = None 

1259 ) -> 'Token': 

1260 ... 

1261 

1262 @overload 

1263 def __new__( 

1264 cls, 

1265 type_: str, 

1266 value: Any, 

1267 start_pos: Optional[int] = None, 

1268 line: Optional[int] = None, 

1269 column: Optional[int] = None, 

1270 end_line: Optional[int] = None, 

1271 end_column: Optional[int] = None, 

1272 end_pos: Optional[int] = None 

1273 ) -> 'Token': ... 

1274 

1275 def __new__(cls, *args, **kwargs): 

1276 if "type_" in kwargs: 1276 ↛ 1277line 1276 didn't jump to line 1277, because the condition on line 1276 was never true

1277 warnings.warn("`type_` is deprecated use `type` instead", DeprecationWarning) 

1278 

1279 if "type" in kwargs: 

1280 raise TypeError("Error: using both 'type' and the deprecated 'type_' as arguments.") 

1281 kwargs["type"] = kwargs.pop("type_") 

1282 

1283 return cls._future_new(*args, **kwargs) 

1284 

1285 

1286 @classmethod 

1287 def _future_new(cls, type, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None): 

1288 inst = super(Token, cls).__new__(cls, value) 

1289 

1290 inst.type = type 

1291 inst.start_pos = start_pos 

1292 inst.value = value 

1293 inst.line = line 

1294 inst.column = column 

1295 inst.end_line = end_line 

1296 inst.end_column = end_column 

1297 inst.end_pos = end_pos 

1298 return inst 

1299 

1300 @overload 

1301 def update(self, type: Optional[str] = None, value: Optional[Any] = None) -> 'Token': 

1302 ... 

1303 

1304 @overload 

1305 def update(self, type_: Optional[str] = None, value: Optional[Any] = None) -> 'Token': 

1306 ... 

1307 

1308 def update(self, *args, **kwargs): 

1309 if "type_" in kwargs: 

1310 warnings.warn("`type_` is deprecated use `type` instead", DeprecationWarning) 

1311 

1312 if "type" in kwargs: 

1313 raise TypeError("Error: using both 'type' and the deprecated 'type_' as arguments.") 

1314 kwargs["type"] = kwargs.pop("type_") 

1315 

1316 return self._future_update(*args, **kwargs) 

1317 

1318 def _future_update(self, type: Optional[str] = None, value: Optional[Any] = None) -> 'Token': 

1319 return Token.new_borrow_pos( 

1320 type if type is not None else self.type, 

1321 value if value is not None else self.value, 

1322 self 

1323 ) 

1324 

1325 @classmethod 

1326 def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: 'Token') -> _T: 

1327 return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) 

1328 

1329 def __reduce__(self): 

1330 return (self.__class__, (self.type, self.value, self.start_pos, self.line, self.column)) 

1331 

1332 def __repr__(self): 

1333 return 'Token(%r, %r)' % (self.type, self.value) 

1334 

1335 def __deepcopy__(self, memo): 

1336 return Token(self.type, self.value, self.start_pos, self.line, self.column) 

1337 

1338 def __eq__(self, other): 

1339 if isinstance(other, Token) and self.type != other.type: 1339 ↛ 1340line 1339 didn't jump to line 1340, because the condition on line 1339 was never true

1340 return False 

1341 

1342 return str.__eq__(self, other) 

1343 

1344 __hash__ = str.__hash__ 

1345 

1346 

1347class LineCounter: 

1348 #-- 

1349 

1350 __slots__ = 'char_pos', 'line', 'column', 'line_start_pos', 'newline_char' 

1351 

1352 def __init__(self, newline_char): 

1353 self.newline_char = newline_char 

1354 self.char_pos = 0 

1355 self.line = 1 

1356 self.column = 1 

1357 self.line_start_pos = 0 

1358 

1359 def __eq__(self, other): 

1360 if not isinstance(other, LineCounter): 

1361 return NotImplemented 

1362 

1363 return self.char_pos == other.char_pos and self.newline_char == other.newline_char 

1364 

1365 def feed(self, token: Token, test_newline=True): 

1366 #-- 

1367 if test_newline: 

1368 newlines = token.count(self.newline_char) 

1369 if newlines: 1369 ↛ 1370line 1369 didn't jump to line 1370, because the condition on line 1369 was never true

1370 self.line += newlines 

1371 self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1 

1372 

1373 self.char_pos += len(token) 

1374 self.column = self.char_pos - self.line_start_pos + 1 

1375 

1376 

1377class UnlessCallback: 

1378 def __init__(self, scanner): 

1379 self.scanner = scanner 

1380 

1381 def __call__(self, t): 

1382 res = self.scanner.match(t.value, 0) 

1383 if res: 1383 ↛ 1384line 1383 didn't jump to line 1384, because the condition on line 1383 was never true

1384 _value, t.type = res 

1385 return t 

1386 

1387 

1388class CallChain: 

1389 def __init__(self, callback1, callback2, cond): 

1390 self.callback1 = callback1 

1391 self.callback2 = callback2 

1392 self.cond = cond 

1393 

1394 def __call__(self, t): 

1395 t2 = self.callback1(t) 

1396 return self.callback2(t) if self.cond(t2) else t2 

1397 

1398 

1399def _get_match(re_, regexp, s, flags): 

1400 m = re_.match(regexp, s, flags) 

1401 if m: 

1402 return m.group(0) 

1403 

1404def _create_unless(terminals, g_regex_flags, re_, use_bytes): 

1405 tokens_by_type = classify(terminals, lambda t: type(t.pattern)) 

1406 assert len(tokens_by_type) <= 2, tokens_by_type.keys() 

1407 embedded_strs = set() 

1408 callback = {} 

1409 for retok in tokens_by_type.get(PatternRE, []): 

1410 unless = [] 

1411 for strtok in tokens_by_type.get(PatternStr, []): 

1412 if strtok.priority != retok.priority: 1412 ↛ 1413line 1412 didn't jump to line 1413, because the condition on line 1412 was never true

1413 continue 

1414 s = strtok.pattern.value 

1415 if s == _get_match(re_, retok.pattern.to_regexp(), s, g_regex_flags): 

1416 unless.append(strtok) 

1417 if strtok.pattern.flags <= retok.pattern.flags: 1417 ↛ 1411line 1417 didn't jump to line 1411, because the condition on line 1417 was never false

1418 embedded_strs.add(strtok) 

1419 if unless: 

1420 callback[retok.name] = UnlessCallback(Scanner(unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes)) 

1421 

1422 new_terminals = [t for t in terminals if t not in embedded_strs] 

1423 return new_terminals, callback 

1424 

1425 

1426class Scanner: 

1427 def __init__(self, terminals, g_regex_flags, re_, use_bytes, match_whole=False): 

1428 self.terminals = terminals 

1429 self.g_regex_flags = g_regex_flags 

1430 self.re_ = re_ 

1431 self.use_bytes = use_bytes 

1432 self.match_whole = match_whole 

1433 

1434 self.allowed_types = {t.name for t in self.terminals} 

1435 

1436 self._mres = self._build_mres(terminals, len(terminals)) 

1437 

1438 def _build_mres(self, terminals, max_size): 

1439 ## 

1440 

1441 ## 

1442 

1443 ## 

1444 

1445 postfix = '$' if self.match_whole else '' 

1446 mres = [] 

1447 while terminals: 

1448 pattern = u'|'.join(u'(?P<%s>%s)' % (t.name, t.pattern.to_regexp() + postfix) for t in terminals[:max_size]) 

1449 if self.use_bytes: 1449 ↛ 1450line 1449 didn't jump to line 1450, because the condition on line 1449 was never true

1450 pattern = pattern.encode('latin-1') 

1451 try: 

1452 mre = self.re_.compile(pattern, self.g_regex_flags) 

1453 except AssertionError: ## 

1454 

1455 return self._build_mres(terminals, max_size // 2) 

1456 

1457 mres.append(mre) 

1458 terminals = terminals[max_size:] 

1459 return mres 

1460 

1461 def match(self, text, pos): 

1462 for mre in self._mres: 

1463 m = mre.match(text, pos) 

1464 if m: 

1465 return m.group(0), m.lastgroup 

1466 

1467 def search(self, text, pos): 

1468 best = None, float("inf") 

1469 for mre in self._mres: 

1470 mre: re.Pattern 

1471 m = mre.search(text, pos) 

1472 if m: 

1473 if m.start() < best[1]: 

1474 best = (m.group(0), m.lastgroup), m.start() 

1475 if best[0] is None: 

1476 return None 

1477 else: 

1478 return best[1] 

1479 

1480 

1481def _regexp_has_newline(r: str): 

1482 #-- 

1483 return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) 

1484 

1485 

1486class LexerState: 

1487 #-- 

1488 

1489 __slots__ = 'text', 'line_ctr', 'last_token' 

1490 

1491 text: str 

1492 line_ctr: LineCounter 

1493 last_token: Optional[Token] 

1494 

1495 def __init__(self, text: str, line_ctr: Optional[LineCounter]=None, last_token: Optional[Token]=None): 

1496 self.text = text 

1497 self.line_ctr = line_ctr or LineCounter(b'\n' if isinstance(text, bytes) else '\n') 

1498 self.last_token = last_token 

1499 

1500 def __eq__(self, other): 

1501 if not isinstance(other, LexerState): 

1502 return NotImplemented 

1503 

1504 return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token 

1505 

1506 def __copy__(self): 

1507 return type(self)(self.text, copy(self.line_ctr), self.last_token) 

1508 

1509 

1510class LexerThread: 

1511 #-- 

1512 

1513 def __init__(self, lexer: 'Lexer', lexer_state: LexerState): 

1514 self.lexer = lexer 

1515 self.state = lexer_state 

1516 

1517 @classmethod 

1518 def from_text(cls, lexer: 'Lexer', text: str) -> 'LexerThread': 

1519 return cls(lexer, LexerState(text)) 

1520 

1521 def lex(self, parser_state): 

1522 return self.lexer.lex(self.state, parser_state) 

1523 

1524 def __copy__(self): 

1525 return type(self)(self.lexer, copy(self.state)) 

1526 

1527 _Token = Token 

1528 

1529 

1530_Callback = Callable[[Token], Token] 

1531 

1532class Lexer(ABC): 

1533 #-- 

1534 @abstractmethod 

1535 def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]: 

1536 return NotImplemented 

1537 

1538 def make_lexer_state(self, text): 

1539 #-- 

1540 return LexerState(text) 

1541 

1542 

1543def _check_regex_collisions(terminal_to_regexp: Dict[TerminalDef, str], comparator, strict_mode, max_collisions_to_show=8): 

1544 if not comparator: 

1545 comparator = interegular.Comparator.from_regexes(terminal_to_regexp) 

1546 

1547 ## 

1548 

1549 ## 

1550 

1551 max_time = 2 if strict_mode else 0.2 

1552 

1553 ## 

1554 

1555 if comparator.count_marked_pairs() >= max_collisions_to_show: 

1556 return 

1557 for group in classify(terminal_to_regexp, lambda t: t.priority).values(): 

1558 for a, b in comparator.check(group, skip_marked=True): 

1559 assert a.priority == b.priority 

1560 ## 

1561 

1562 comparator.mark(a, b) 

1563 

1564 ## 

1565 

1566 message = f"Collision between Terminals {a.name} and {b.name}. " 

1567 try: 

1568 example = comparator.get_example_overlap(a, b, max_time).format_multiline() 

1569 except ValueError: 

1570 ## 

1571 

1572 example = "No example could be found fast enough. However, the collision does still exists" 

1573 if strict_mode: 

1574 raise LexError(f"{message}\n{example}") 

1575 logger.warning("%s The lexer will choose between them arbitrarily.\n%s", message, example) 

1576 if comparator.count_marked_pairs() >= max_collisions_to_show: 

1577 logger.warning("Found 8 regex collisions, will not check for more.") 

1578 return 

1579 

1580 

1581class AbstractBasicLexer(Lexer): 

1582 terminals_by_name: Dict[str, TerminalDef] 

1583 

1584 @abstractmethod 

1585 def __init__(self, conf: 'LexerConf', comparator=None) -> None: 

1586 ... 

1587 

1588 @abstractmethod 

1589 def next_token(self, lex_state: LexerState, parser_state: Any = None) -> Token: 

1590 ... 

1591 

1592 def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]: 

1593 with suppress(EOFError): 

1594 while True: 

1595 yield self.next_token(state, parser_state) 

1596 

1597 

1598class BasicLexer(AbstractBasicLexer): 

1599 terminals: Collection[TerminalDef] 

1600 ignore_types: FrozenSet[str] 

1601 newline_types: FrozenSet[str] 

1602 user_callbacks: Dict[str, _Callback] 

1603 callback: Dict[str, _Callback] 

1604 re: ModuleType 

1605 

1606 def __init__(self, conf: 'LexerConf', comparator=None) -> None: 

1607 terminals = list(conf.terminals) 

1608 assert all(isinstance(t, TerminalDef) for t in terminals), terminals 

1609 

1610 self.re = conf.re_module 

1611 

1612 if not conf.skip_validation: 1612 ↛ 1615line 1612 didn't jump to line 1615, because the condition on line 1612 was never true

1613 ## 

1614 

1615 terminal_to_regexp = {} 

1616 for t in terminals: 

1617 regexp = t.pattern.to_regexp() 

1618 try: 

1619 self.re.compile(regexp, conf.g_regex_flags) 

1620 except self.re.error: 

1621 raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern)) 

1622 

1623 if t.pattern.min_width == 0: 

1624 raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern)) 

1625 if t.pattern.type == "re": 

1626 terminal_to_regexp[t] = regexp 

1627 

1628 if not (set(conf.ignore) <= {t.name for t in terminals}): 

1629 raise LexError("Ignore terminals are not defined: %s" % (set(conf.ignore) - {t.name for t in terminals})) 

1630 

1631 if has_interegular: 

1632 _check_regex_collisions(terminal_to_regexp, comparator, conf.strict) 

1633 elif conf.strict: 

1634 raise LexError("interegular must be installed for strict mode. Use `pip install 'lark[interegular]'`.") 

1635 

1636 ## 

1637 

1638 self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())) 

1639 self.ignore_types = frozenset(conf.ignore) 

1640 

1641 terminals.sort(key=lambda x: (-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name)) 

1642 self.terminals = terminals 

1643 self.user_callbacks = conf.callbacks 

1644 self.g_regex_flags = conf.g_regex_flags 

1645 self.use_bytes = conf.use_bytes 

1646 self.terminals_by_name = conf.terminals_by_name 

1647 

1648 self._scanner = None 

1649 

1650 def _build_scanner(self): 

1651 terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes) 

1652 assert all(self.callback.values()) 

1653 

1654 for type_, f in self.user_callbacks.items(): 1654 ↛ 1655line 1654 didn't jump to line 1655, because the loop on line 1654 never started

1655 if type_ in self.callback: 

1656 ## 

1657 

1658 self.callback[type_] = CallChain(self.callback[type_], f, lambda t: t.type == type_) 

1659 else: 

1660 self.callback[type_] = f 

1661 

1662 self._scanner = Scanner(terminals, self.g_regex_flags, self.re, self.use_bytes) 

1663 

1664 @property 

1665 def scanner(self): 

1666 if self._scanner is None: 

1667 self._build_scanner() 

1668 return self._scanner 

1669 

1670 def match(self, text, pos): 

1671 return self.scanner.match(text, pos) 

1672 

1673 def next_token(self, lex_state: LexerState, parser_state: Any = None) -> Token: 

1674 line_ctr = lex_state.line_ctr 

1675 while line_ctr.char_pos < len(lex_state.text): 

1676 res = self.match(lex_state.text, line_ctr.char_pos) 

1677 if not res: 

1678 allowed = self.scanner.allowed_types - self.ignore_types 

1679 if not allowed: 1679 ↛ 1680line 1679 didn't jump to line 1680, because the condition on line 1679 was never true

1680 allowed = {"<END-OF-FILE>"} 

1681 raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column, 

1682 allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token], 

1683 state=parser_state, terminals_by_name=self.terminals_by_name) 

1684 

1685 value, type_ = res 

1686 

1687 ignored = type_ in self.ignore_types 

1688 t = None 

1689 if not ignored or type_ in self.callback: 

1690 t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) 

1691 line_ctr.feed(value, type_ in self.newline_types) 

1692 if t is not None: 

1693 t.end_line = line_ctr.line 

1694 t.end_column = line_ctr.column 

1695 t.end_pos = line_ctr.char_pos 

1696 if t.type in self.callback: 

1697 t = self.callback[t.type](t) 

1698 if not ignored: 1698 ↛ 1675line 1698 didn't jump to line 1675, because the condition on line 1698 was never false

1699 if not isinstance(t, Token): 1699 ↛ 1700line 1699 didn't jump to line 1700, because the condition on line 1699 was never true

1700 raise LexError("Callbacks must return a token (returned %r)" % t) 

1701 lex_state.last_token = t 

1702 return t 

1703 

1704 ## 

1705 

1706 raise EOFError(self) 

1707 

1708 

1709class ContextualLexer(Lexer): 

1710 lexers: Dict[int, AbstractBasicLexer] 

1711 root_lexer: AbstractBasicLexer 

1712 

1713 BasicLexer: Type[AbstractBasicLexer] = BasicLexer 

1714 

1715 def __init__(self, conf: 'LexerConf', states: Dict[int, Collection[str]], always_accept: Collection[str]=()) -> None: 

1716 terminals = list(conf.terminals) 

1717 terminals_by_name = conf.terminals_by_name 

1718 

1719 trad_conf = copy(conf) 

1720 trad_conf.terminals = terminals 

1721 

1722 if has_interegular and not conf.skip_validation: 1722 ↛ 1723line 1722 didn't jump to line 1723, because the condition on line 1722 was never true

1723 comparator = interegular.Comparator.from_regexes({t: t.pattern.to_regexp() for t in terminals}) 

1724 else: 

1725 comparator = None 

1726 lexer_by_tokens: Dict[FrozenSet[str], AbstractBasicLexer] = {} 

1727 self.lexers = {} 

1728 for state, accepts in states.items(): 

1729 key = frozenset(accepts) 

1730 try: 

1731 lexer = lexer_by_tokens[key] 

1732 except KeyError: 

1733 accepts = set(accepts) | set(conf.ignore) | set(always_accept) 

1734 lexer_conf = copy(trad_conf) 

1735 lexer_conf.terminals = [terminals_by_name[n] for n in accepts if n in terminals_by_name] 

1736 lexer = self.BasicLexer(lexer_conf, comparator) 

1737 lexer_by_tokens[key] = lexer 

1738 

1739 self.lexers[state] = lexer 

1740 

1741 assert trad_conf.terminals is terminals 

1742 trad_conf.skip_validation = True ## 

1743 

1744 self.root_lexer = self.BasicLexer(trad_conf, comparator) 

1745 

1746 def lex(self, lexer_state: LexerState, parser_state: 'ParserState') -> Iterator[Token]: 

1747 try: 

1748 while True: 

1749 lexer = self.lexers[parser_state.position] 

1750 yield lexer.next_token(lexer_state, parser_state) 

1751 except EOFError: 

1752 pass 

1753 except UnexpectedCharacters as e: 

1754 ## 

1755 

1756 ## 

1757 

1758 try: 

1759 last_token = lexer_state.last_token ## 

1760 

1761 token = self.root_lexer.next_token(lexer_state, parser_state) 

1762 raise UnexpectedToken(token, e.allowed, state=parser_state, token_history=[last_token], terminals_by_name=self.root_lexer.terminals_by_name) 

1763 except UnexpectedCharacters: 

1764 raise e ## 

1765 

1766 

1767 

1768 

1769_ParserArgType: 'TypeAlias' = 'Literal["earley", "lalr", "cyk", "auto"]' 

1770_LexerArgType: 'TypeAlias' = 'Union[Literal["auto", "basic", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]]' 

1771_LexerCallback = Callable[[Token], Token] 

1772ParserCallbacks = Dict[str, Callable] 

1773 

1774class LexerConf(Serialize): 

1775 __serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type' 

1776 __serialize_namespace__ = TerminalDef, 

1777 

1778 terminals: Collection[TerminalDef] 

1779 re_module: ModuleType 

1780 ignore: Collection[str] 

1781 postlex: 'Optional[PostLex]' 

1782 callbacks: Dict[str, _LexerCallback] 

1783 g_regex_flags: int 

1784 skip_validation: bool 

1785 use_bytes: bool 

1786 lexer_type: Optional[_LexerArgType] 

1787 strict: bool 

1788 

1789 def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, 

1790 callbacks: Optional[Dict[str, _LexerCallback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False, strict: bool=False): 

1791 self.terminals = terminals 

1792 self.terminals_by_name = {t.name: t for t in self.terminals} 

1793 assert len(self.terminals) == len(self.terminals_by_name) 

1794 self.ignore = ignore 

1795 self.postlex = postlex 

1796 self.callbacks = callbacks or {} 

1797 self.g_regex_flags = g_regex_flags 

1798 self.re_module = re_module 

1799 self.skip_validation = skip_validation 

1800 self.use_bytes = use_bytes 

1801 self.strict = strict 

1802 self.lexer_type = None 

1803 

1804 def _deserialize(self): 

1805 self.terminals_by_name = {t.name: t for t in self.terminals} 

1806 

1807 def __deepcopy__(self, memo=None): 

1808 return type(self)( 

1809 deepcopy(self.terminals, memo), 

1810 self.re_module, 

1811 deepcopy(self.ignore, memo), 

1812 deepcopy(self.postlex, memo), 

1813 deepcopy(self.callbacks, memo), 

1814 deepcopy(self.g_regex_flags, memo), 

1815 deepcopy(self.skip_validation, memo), 

1816 deepcopy(self.use_bytes, memo), 

1817 ) 

1818 

1819class ParserConf(Serialize): 

1820 __serialize_fields__ = 'rules', 'start', 'parser_type' 

1821 

1822 rules: List['Rule'] 

1823 callbacks: ParserCallbacks 

1824 start: List[str] 

1825 parser_type: _ParserArgType 

1826 

1827 def __init__(self, rules: List['Rule'], callbacks: ParserCallbacks, start: List[str]): 

1828 assert isinstance(start, list) 

1829 self.rules = rules 

1830 self.callbacks = callbacks 

1831 self.start = start 

1832 

1833 

1834from functools import partial, wraps 

1835from itertools import product 

1836 

1837 

1838class ExpandSingleChild: 

1839 def __init__(self, node_builder): 

1840 self.node_builder = node_builder 

1841 

1842 def __call__(self, children): 

1843 if len(children) == 1: 1843 ↛ 1844line 1843 didn't jump to line 1844, because the condition on line 1843 was never true

1844 return children[0] 

1845 else: 

1846 return self.node_builder(children) 

1847 

1848 

1849 

1850class PropagatePositions: 

1851 def __init__(self, node_builder, node_filter=None): 

1852 self.node_builder = node_builder 

1853 self.node_filter = node_filter 

1854 

1855 def __call__(self, children): 

1856 res = self.node_builder(children) 

1857 

1858 if isinstance(res, Tree): 

1859 ## 

1860 

1861 ## 

1862 

1863 ## 

1864 

1865 ## 

1866 

1867 

1868 res_meta = res.meta 

1869 

1870 first_meta = self._pp_get_meta(children) 

1871 if first_meta is not None: 

1872 if not hasattr(res_meta, 'line'): 

1873 ## 

1874 

1875 res_meta.line = getattr(first_meta, 'container_line', first_meta.line) 

1876 res_meta.column = getattr(first_meta, 'container_column', first_meta.column) 

1877 res_meta.start_pos = getattr(first_meta, 'container_start_pos', first_meta.start_pos) 

1878 res_meta.empty = False 

1879 

1880 res_meta.container_line = getattr(first_meta, 'container_line', first_meta.line) 

1881 res_meta.container_column = getattr(first_meta, 'container_column', first_meta.column) 

1882 res_meta.container_start_pos = getattr(first_meta, 'container_start_pos', first_meta.start_pos) 

1883 

1884 last_meta = self._pp_get_meta(reversed(children)) 

1885 if last_meta is not None: 

1886 if not hasattr(res_meta, 'end_line'): 

1887 res_meta.end_line = getattr(last_meta, 'container_end_line', last_meta.end_line) 

1888 res_meta.end_column = getattr(last_meta, 'container_end_column', last_meta.end_column) 

1889 res_meta.end_pos = getattr(last_meta, 'container_end_pos', last_meta.end_pos) 

1890 res_meta.empty = False 

1891 

1892 res_meta.container_end_line = getattr(last_meta, 'container_end_line', last_meta.end_line) 

1893 res_meta.container_end_column = getattr(last_meta, 'container_end_column', last_meta.end_column) 

1894 res_meta.container_end_pos = getattr(last_meta, 'container_end_pos', last_meta.end_pos) 

1895 

1896 return res 

1897 

1898 def _pp_get_meta(self, children): 

1899 for c in children: 

1900 if self.node_filter is not None and not self.node_filter(c): 

1901 continue 

1902 if isinstance(c, Tree): 

1903 if not c.meta.empty: 

1904 return c.meta 

1905 elif isinstance(c, Token): 

1906 return c 

1907 elif hasattr(c, '__lark_meta__'): 

1908 return c.__lark_meta__() 

1909 

1910def make_propagate_positions(option): 

1911 if callable(option): 1911 ↛ 1912line 1911 didn't jump to line 1912, because the condition on line 1911 was never true

1912 return partial(PropagatePositions, node_filter=option) 

1913 elif option is True: 1913 ↛ 1914line 1913 didn't jump to line 1914, because the condition on line 1913 was never true

1914 return PropagatePositions 

1915 elif option is False: 1915 ↛ 1918line 1915 didn't jump to line 1918, because the condition on line 1915 was never false

1916 return None 

1917 

1918 raise ConfigurationError('Invalid option for propagate_positions: %r' % option) 

1919 

1920 

1921class ChildFilter: 

1922 def __init__(self, to_include, append_none, node_builder): 

1923 self.node_builder = node_builder 

1924 self.to_include = to_include 

1925 self.append_none = append_none 

1926 

1927 def __call__(self, children): 

1928 filtered = [] 

1929 

1930 for i, to_expand, add_none in self.to_include: 

1931 if add_none: 

1932 filtered += [None] * add_none 

1933 if to_expand: 

1934 filtered += children[i].children 

1935 else: 

1936 filtered.append(children[i]) 

1937 

1938 if self.append_none: 

1939 filtered += [None] * self.append_none 

1940 

1941 return self.node_builder(filtered) 

1942 

1943 

1944class ChildFilterLALR(ChildFilter): 

1945 #-- 

1946 

1947 def __call__(self, children): 

1948 filtered = [] 

1949 for i, to_expand, add_none in self.to_include: 

1950 if add_none: 

1951 filtered += [None] * add_none 

1952 if to_expand: 

1953 if filtered: 

1954 filtered += children[i].children 

1955 else: ## 

1956 

1957 filtered = children[i].children 

1958 else: 

1959 filtered.append(children[i]) 

1960 

1961 if self.append_none: 

1962 filtered += [None] * self.append_none 

1963 

1964 return self.node_builder(filtered) 

1965 

1966 

1967class ChildFilterLALR_NoPlaceholders(ChildFilter): 

1968 #-- 

1969 def __init__(self, to_include, node_builder): 

1970 self.node_builder = node_builder 

1971 self.to_include = to_include 

1972 

1973 def __call__(self, children): 

1974 filtered = [] 

1975 for i, to_expand in self.to_include: 

1976 if to_expand: 

1977 if filtered: 1977 ↛ 1981line 1977 didn't jump to line 1981, because the condition on line 1977 was never false

1978 filtered += children[i].children 

1979 else: ## 

1980 

1981 filtered = children[i].children 

1982 else: 

1983 filtered.append(children[i]) 

1984 return self.node_builder(filtered) 

1985 

1986 

1987def _should_expand(sym): 

1988 return not sym.is_term and sym.name.startswith('_') 

1989 

1990 

1991def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices: List[bool]): 

1992 ## 

1993 

1994 if _empty_indices: 1994 ↛ 1995line 1994 didn't jump to line 1995, because the condition on line 1994 was never true

1995 assert _empty_indices.count(False) == len(expansion) 

1996 s = ''.join(str(int(b)) for b in _empty_indices) 

1997 empty_indices = [len(ones) for ones in s.split('0')] 

1998 assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion)) 

1999 else: 

2000 empty_indices = [0] * (len(expansion)+1) 

2001 

2002 to_include = [] 

2003 nones_to_add = 0 

2004 for i, sym in enumerate(expansion): 

2005 nones_to_add += empty_indices[i] 

2006 if keep_all_tokens or not (sym.is_term and sym.filter_out): 

2007 to_include.append((i, _should_expand(sym), nones_to_add)) 

2008 nones_to_add = 0 

2009 

2010 nones_to_add += empty_indices[len(expansion)] 

2011 

2012 if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include): 

2013 if _empty_indices or ambiguous: 2013 ↛ 2014line 2013 didn't jump to line 2014, because the condition on line 2013 was never true

2014 return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add) 

2015 else: 

2016 ## 

2017 

2018 return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include]) 

2019 

2020 

2021class AmbiguousExpander: 

2022 #-- 

2023 def __init__(self, to_expand, tree_class, node_builder): 

2024 self.node_builder = node_builder 

2025 self.tree_class = tree_class 

2026 self.to_expand = to_expand 

2027 

2028 def __call__(self, children): 

2029 def _is_ambig_tree(t): 

2030 return hasattr(t, 'data') and t.data == '_ambig' 

2031 

2032 ## 

2033 

2034 ## 

2035 

2036 ## 

2037 

2038 ## 

2039 

2040 ambiguous = [] 

2041 for i, child in enumerate(children): 

2042 if _is_ambig_tree(child): 

2043 if i in self.to_expand: 

2044 ambiguous.append(i) 

2045 

2046 child.expand_kids_by_data('_ambig') 

2047 

2048 if not ambiguous: 

2049 return self.node_builder(children) 

2050 

2051 expand = [child.children if i in ambiguous else (child,) for i, child in enumerate(children)] 

2052 return self.tree_class('_ambig', [self.node_builder(list(f)) for f in product(*expand)]) 

2053 

2054 

2055def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens): 

2056 to_expand = [i for i, sym in enumerate(expansion) 

2057 if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))] 

2058 if to_expand: 

2059 return partial(AmbiguousExpander, to_expand, tree_class) 

2060 

2061 

2062class AmbiguousIntermediateExpander: 

2063 #-- 

2064 

2065 def __init__(self, tree_class, node_builder): 

2066 self.node_builder = node_builder 

2067 self.tree_class = tree_class 

2068 

2069 def __call__(self, children): 

2070 def _is_iambig_tree(child): 

2071 return hasattr(child, 'data') and child.data == '_iambig' 

2072 

2073 def _collapse_iambig(children): 

2074 #-- 

2075 

2076 ## 

2077 

2078 ## 

2079 

2080 if children and _is_iambig_tree(children[0]): 

2081 iambig_node = children[0] 

2082 result = [] 

2083 for grandchild in iambig_node.children: 

2084 collapsed = _collapse_iambig(grandchild.children) 

2085 if collapsed: 

2086 for child in collapsed: 

2087 child.children += children[1:] 

2088 result += collapsed 

2089 else: 

2090 new_tree = self.tree_class('_inter', grandchild.children + children[1:]) 

2091 result.append(new_tree) 

2092 return result 

2093 

2094 collapsed = _collapse_iambig(children) 

2095 if collapsed: 

2096 processed_nodes = [self.node_builder(c.children) for c in collapsed] 

2097 return self.tree_class('_ambig', processed_nodes) 

2098 

2099 return self.node_builder(children) 

2100 

2101 

2102 

2103def inplace_transformer(func): 

2104 @wraps(func) 

2105 def f(children): 

2106 ## 

2107 

2108 tree = Tree(func.__name__, children) 

2109 return func(tree) 

2110 return f 

2111 

2112 

2113def apply_visit_wrapper(func, name, wrapper): 

2114 if wrapper is _vargs_meta or wrapper is _vargs_meta_inline: 

2115 raise NotImplementedError("Meta args not supported for internal transformer") 

2116 

2117 @wraps(func) 

2118 def f(children): 

2119 return wrapper(func, name, children, None) 

2120 return f 

2121 

2122 

2123class ParseTreeBuilder: 

2124 def __init__(self, rules, tree_class, propagate_positions=False, ambiguous=False, maybe_placeholders=False): 

2125 self.tree_class = tree_class 

2126 self.propagate_positions = propagate_positions 

2127 self.ambiguous = ambiguous 

2128 self.maybe_placeholders = maybe_placeholders 

2129 

2130 self.rule_builders = list(self._init_builders(rules)) 

2131 

2132 def _init_builders(self, rules): 

2133 propagate_positions = make_propagate_positions(self.propagate_positions) 

2134 

2135 for rule in rules: 

2136 options = rule.options 

2137 keep_all_tokens = options.keep_all_tokens 

2138 expand_single_child = options.expand1 

2139 

2140 wrapper_chain = list(filter(None, [ 

2141 (expand_single_child and not rule.alias) and ExpandSingleChild, 

2142 maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None), 

2143 propagate_positions, 

2144 self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens), 

2145 self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class) 

2146 ])) 

2147 

2148 yield rule, wrapper_chain 

2149 

2150 def create_callback(self, transformer=None): 

2151 callbacks = {} 

2152 

2153 default_handler = getattr(transformer, '__default__', None) 

2154 if default_handler: 2154 ↛ 2155line 2154 didn't jump to line 2155, because the condition on line 2154 was never true

2155 def default_callback(data, children): 

2156 return default_handler(data, children, None) 

2157 else: 

2158 default_callback = self.tree_class 

2159 

2160 for rule, wrapper_chain in self.rule_builders: 

2161 

2162 user_callback_name = rule.alias or rule.options.template_source or rule.origin.name 

2163 try: 

2164 f = getattr(transformer, user_callback_name) 

2165 wrapper = getattr(f, 'visit_wrapper', None) 

2166 if wrapper is not None: 

2167 f = apply_visit_wrapper(f, user_callback_name, wrapper) 

2168 elif isinstance(transformer, Transformer_InPlace): 

2169 f = inplace_transformer(f) 

2170 except AttributeError: 

2171 f = partial(default_callback, user_callback_name) 

2172 

2173 for w in wrapper_chain: 

2174 f = w(f) 

2175 

2176 if rule in callbacks: 2176 ↛ 2177line 2176 didn't jump to line 2177, because the condition on line 2176 was never true

2177 raise GrammarError("Rule '%s' already exists" % (rule,)) 

2178 

2179 callbacks[rule] = f 

2180 

2181 return callbacks 

2182 

2183 

2184 

2185class Action: 

2186 def __init__(self, name): 

2187 self.name = name 

2188 def __str__(self): 

2189 return self.name 

2190 def __repr__(self): 

2191 return str(self) 

2192 

2193Shift = Action('Shift') 

2194Reduce = Action('Reduce') 

2195 

2196StateT = TypeVar("StateT") 

2197 

2198class ParseTableBase(Generic[StateT]): 

2199 states: Dict[StateT, Dict[str, Tuple]] 

2200 start_states: Dict[str, StateT] 

2201 end_states: Dict[str, StateT] 

2202 

2203 def __init__(self, states, start_states, end_states): 

2204 self.states = states 

2205 self.start_states = start_states 

2206 self.end_states = end_states 

2207 

2208 def serialize(self, memo): 

2209 tokens = Enumerator() 

2210 

2211 states = { 

2212 state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg)) 

2213 for token, (action, arg) in actions.items()} 

2214 for state, actions in self.states.items() 

2215 } 

2216 

2217 return { 

2218 'tokens': tokens.reversed(), 

2219 'states': states, 

2220 'start_states': self.start_states, 

2221 'end_states': self.end_states, 

2222 } 

2223 

2224 @classmethod 

2225 def deserialize(cls, data, memo): 

2226 tokens = data['tokens'] 

2227 states = { 

2228 state: {tokens[token]: ((Reduce, Rule.deserialize(arg, memo)) if action==1 else (Shift, arg)) 

2229 for token, (action, arg) in actions.items()} 

2230 for state, actions in data['states'].items() 

2231 } 

2232 return cls(states, data['start_states'], data['end_states']) 

2233 

2234class ParseTable(ParseTableBase['State']): 

2235 #-- 

2236 pass 

2237 

2238 

2239class IntParseTable(ParseTableBase[int]): 

2240 #-- 

2241 

2242 @classmethod 

2243 def from_ParseTable(cls, parse_table: ParseTable): 

2244 enum = list(parse_table.states) 

2245 state_to_idx: Dict['State', int] = {s:i for i,s in enumerate(enum)} 

2246 int_states = {} 

2247 

2248 for s, la in parse_table.states.items(): 

2249 la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v 

2250 for k,v in la.items()} 

2251 int_states[ state_to_idx[s] ] = la 

2252 

2253 

2254 start_states = {start:state_to_idx[s] for start, s in parse_table.start_states.items()} 

2255 end_states = {start:state_to_idx[s] for start, s in parse_table.end_states.items()} 

2256 return cls(int_states, start_states, end_states) 

2257 

2258 

2259 

2260class ParseConf(Generic[StateT]): 

2261 __slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states' 

2262 

2263 parse_table: ParseTableBase[StateT] 

2264 callbacks: ParserCallbacks 

2265 start: str 

2266 

2267 start_state: StateT 

2268 end_state: StateT 

2269 states: Dict[StateT, Dict[str, tuple]] 

2270 

2271 def __init__(self, parse_table: ParseTableBase[StateT], callbacks: ParserCallbacks, start: str): 

2272 self.parse_table = parse_table 

2273 

2274 self.start_state = self.parse_table.start_states[start] 

2275 self.end_state = self.parse_table.end_states[start] 

2276 self.states = self.parse_table.states 

2277 

2278 self.callbacks = callbacks 

2279 self.start = start 

2280 

2281class ParserState(Generic[StateT]): 

2282 __slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack' 

2283 

2284 parse_conf: ParseConf[StateT] 

2285 lexer: LexerThread 

2286 state_stack: List[StateT] 

2287 value_stack: list 

2288 

2289 def __init__(self, parse_conf: ParseConf[StateT], lexer: LexerThread, state_stack=None, value_stack=None): 

2290 self.parse_conf = parse_conf 

2291 self.lexer = lexer 

2292 self.state_stack = state_stack or [self.parse_conf.start_state] 

2293 self.value_stack = value_stack or [] 

2294 

2295 @property 

2296 def position(self) -> StateT: 

2297 return self.state_stack[-1] 

2298 

2299 ## 

2300 

2301 def __eq__(self, other) -> bool: 

2302 if not isinstance(other, ParserState): 

2303 return NotImplemented 

2304 return len(self.state_stack) == len(other.state_stack) and self.position == other.position 

2305 

2306 def __copy__(self): 

2307 return type(self)( 

2308 self.parse_conf, 

2309 self.lexer, ## 

2310 

2311 copy(self.state_stack), 

2312 deepcopy(self.value_stack), 

2313 ) 

2314 

2315 def copy(self) -> 'ParserState[StateT]': 

2316 return copy(self) 

2317 

2318 def feed_token(self, token: Token, is_end=False) -> Any: 

2319 state_stack = self.state_stack 

2320 value_stack = self.value_stack 

2321 states = self.parse_conf.states 

2322 end_state = self.parse_conf.end_state 

2323 callbacks = self.parse_conf.callbacks 

2324 

2325 while True: 

2326 state = state_stack[-1] 

2327 try: 

2328 action, arg = states[state][token.type] 

2329 except KeyError: 

2330 expected = {s for s in states[state].keys() if s.isupper()} 

2331 raise UnexpectedToken(token, expected, state=self, interactive_parser=None) 

2332 

2333 assert arg != end_state 

2334 

2335 if action is Shift: 

2336 ## 

2337 

2338 assert not is_end 

2339 state_stack.append(arg) 

2340 value_stack.append(token if token.type not in callbacks else callbacks[token.type](token)) 

2341 return 

2342 else: 

2343 ## 

2344 

2345 rule = arg 

2346 size = len(rule.expansion) 

2347 if size: 2347 ↛ 2352line 2347 didn't jump to line 2352, because the condition on line 2347 was never false

2348 s = value_stack[-size:] 

2349 del state_stack[-size:] 

2350 del value_stack[-size:] 

2351 else: 

2352 s = [] 

2353 

2354 value = callbacks[rule](s) if callbacks else s 

2355 

2356 _action, new_state = states[state_stack[-1]][rule.origin.name] 

2357 assert _action is Shift 

2358 state_stack.append(new_state) 

2359 value_stack.append(value) 

2360 

2361 if is_end and state_stack[-1] == end_state: 

2362 return value_stack[-1] 

2363 

2364 

2365class LALR_Parser(Serialize): 

2366 def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False): 

2367 analysis = LALR_Analyzer(parser_conf, debug=debug, strict=strict) 

2368 analysis.compute_lalr() 

2369 callbacks = parser_conf.callbacks 

2370 

2371 self._parse_table = analysis.parse_table 

2372 self.parser_conf = parser_conf 

2373 self.parser = _Parser(analysis.parse_table, callbacks, debug) 

2374 

2375 @classmethod 

2376 def deserialize(cls, data, memo, callbacks, debug=False): 

2377 inst = cls.__new__(cls) 

2378 inst._parse_table = IntParseTable.deserialize(data, memo) 

2379 inst.parser = _Parser(inst._parse_table, callbacks, debug) 

2380 return inst 

2381 

2382 def serialize(self, memo: Any = None) -> Dict[str, Any]: 

2383 return self._parse_table.serialize(memo) 

2384 

2385 def parse_interactive(self, lexer: LexerThread, start: str): 

2386 return self.parser.parse(lexer, start, start_interactive=True) 

2387 

2388 def parse(self, lexer, start, on_error=None): 

2389 try: 

2390 return self.parser.parse(lexer, start) 

2391 except UnexpectedInput as e: 

2392 if on_error is None: 2392 ↛ 2395line 2392 didn't jump to line 2395, because the condition on line 2392 was never false

2393 raise 

2394 

2395 while True: 

2396 if isinstance(e, UnexpectedCharacters): 

2397 s = e.interactive_parser.lexer_thread.state 

2398 p = s.line_ctr.char_pos 

2399 

2400 if not on_error(e): 

2401 raise e 

2402 

2403 if isinstance(e, UnexpectedCharacters): 

2404 ## 

2405 

2406 if p == s.line_ctr.char_pos: 

2407 s.line_ctr.feed(s.text[p:p+1]) 

2408 

2409 try: 

2410 return e.interactive_parser.resume_parse() 

2411 except UnexpectedToken as e2: 

2412 if (isinstance(e, UnexpectedToken) 

2413 and e.token.type == e2.token.type == '$END' 

2414 and e.interactive_parser == e2.interactive_parser): 

2415 ## 

2416 

2417 raise e2 

2418 e = e2 

2419 except UnexpectedCharacters as e2: 

2420 e = e2 

2421 

2422 

2423class _Parser: 

2424 parse_table: ParseTableBase 

2425 callbacks: ParserCallbacks 

2426 debug: bool 

2427 

2428 def __init__(self, parse_table: ParseTableBase, callbacks: ParserCallbacks, debug: bool=False): 

2429 self.parse_table = parse_table 

2430 self.callbacks = callbacks 

2431 self.debug = debug 

2432 

2433 def parse(self, lexer: LexerThread, start: str, value_stack=None, state_stack=None, start_interactive=False): 

2434 parse_conf = ParseConf(self.parse_table, self.callbacks, start) 

2435 parser_state = ParserState(parse_conf, lexer, state_stack, value_stack) 

2436 if start_interactive: 2436 ↛ 2437line 2436 didn't jump to line 2437, because the condition on line 2436 was never true

2437 return InteractiveParser(self, parser_state, parser_state.lexer) 

2438 return self.parse_from_state(parser_state) 

2439 

2440 

2441 def parse_from_state(self, state: ParserState, last_token: Optional[Token]=None): 

2442 #-- 

2443 try: 

2444 token = last_token 

2445 for token in state.lexer.lex(state): 

2446 assert token is not None 

2447 state.feed_token(token) 

2448 

2449 end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1) 

2450 return state.feed_token(end_token, True) 

2451 except UnexpectedInput as e: 2451 ↛ 2457line 2451 didn't jump to line 2457

2452 try: 

2453 e.interactive_parser = InteractiveParser(self, state, state.lexer) 

2454 except NameError: 

2455 pass 

2456 raise e 

2457 except Exception as e: 

2458 if self.debug: 

2459 print("") 

2460 print("STATE STACK DUMP") 

2461 print("----------------") 

2462 for i, s in enumerate(state.state_stack): 

2463 print('%d)' % i , s) 

2464 print("") 

2465 

2466 raise 

2467 

2468 

2469class InteractiveParser: 

2470 #-- 

2471 def __init__(self, parser, parser_state, lexer_thread: LexerThread): 

2472 self.parser = parser 

2473 self.parser_state = parser_state 

2474 self.lexer_thread = lexer_thread 

2475 self.result = None 

2476 

2477 @property 

2478 def lexer_state(self) -> LexerThread: 

2479 warnings.warn("lexer_state will be removed in subsequent releases. Use lexer_thread instead.", DeprecationWarning) 

2480 return self.lexer_thread 

2481 

2482 def feed_token(self, token: Token): 

2483 #-- 

2484 return self.parser_state.feed_token(token, token.type == '$END') 

2485 

2486 def iter_parse(self) -> Iterator[Token]: 

2487 #-- 

2488 for token in self.lexer_thread.lex(self.parser_state): 

2489 yield token 

2490 self.result = self.feed_token(token) 

2491 

2492 def exhaust_lexer(self) -> List[Token]: 

2493 #-- 

2494 return list(self.iter_parse()) 

2495 

2496 

2497 def feed_eof(self, last_token=None): 

2498 #-- 

2499 eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else self.lexer_thread._Token('$END', '', 0, 1, 1) 

2500 return self.feed_token(eof) 

2501 

2502 

2503 def __copy__(self): 

2504 #-- 

2505 return type(self)( 

2506 self.parser, 

2507 copy(self.parser_state), 

2508 copy(self.lexer_thread), 

2509 ) 

2510 

2511 def copy(self): 

2512 return copy(self) 

2513 

2514 def __eq__(self, other): 

2515 if not isinstance(other, InteractiveParser): 

2516 return False 

2517 

2518 return self.parser_state == other.parser_state and self.lexer_thread == other.lexer_thread 

2519 

2520 def as_immutable(self): 

2521 #-- 

2522 p = copy(self) 

2523 return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_thread) 

2524 

2525 def pretty(self): 

2526 #-- 

2527 out = ["Parser choices:"] 

2528 for k, v in self.choices().items(): 

2529 out.append('\t- %s -> %r' % (k, v)) 

2530 out.append('stack size: %s' % len(self.parser_state.state_stack)) 

2531 return '\n'.join(out) 

2532 

2533 def choices(self): 

2534 #-- 

2535 return self.parser_state.parse_conf.parse_table.states[self.parser_state.position] 

2536 

2537 def accepts(self): 

2538 #-- 

2539 accepts = set() 

2540 conf_no_callbacks = copy(self.parser_state.parse_conf) 

2541 ## 

2542 

2543 ## 

2544 

2545 conf_no_callbacks.callbacks = {} 

2546 for t in self.choices(): 

2547 if t.isupper(): ## 

2548 

2549 new_cursor = copy(self) 

2550 new_cursor.parser_state.parse_conf = conf_no_callbacks 

2551 try: 

2552 new_cursor.feed_token(self.lexer_thread._Token(t, '')) 

2553 except UnexpectedToken: 

2554 pass 

2555 else: 

2556 accepts.add(t) 

2557 return accepts 

2558 

2559 def resume_parse(self): 

2560 #-- 

2561 return self.parser.parse_from_state(self.parser_state, last_token=self.lexer_thread.state.last_token) 

2562 

2563 

2564 

2565class ImmutableInteractiveParser(InteractiveParser): 

2566 #-- 

2567 

2568 result = None 

2569 

2570 def __hash__(self): 

2571 return hash((self.parser_state, self.lexer_thread)) 

2572 

2573 def feed_token(self, token): 

2574 c = copy(self) 

2575 c.result = InteractiveParser.feed_token(c, token) 

2576 return c 

2577 

2578 def exhaust_lexer(self): 

2579 #-- 

2580 cursor = self.as_mutable() 

2581 cursor.exhaust_lexer() 

2582 return cursor.as_immutable() 

2583 

2584 def as_mutable(self): 

2585 #-- 

2586 p = copy(self) 

2587 return InteractiveParser(p.parser, p.parser_state, p.lexer_thread) 

2588 

2589 

2590 

2591def _wrap_lexer(lexer_class): 

2592 future_interface = getattr(lexer_class, '__future_interface__', False) 

2593 if future_interface: 

2594 return lexer_class 

2595 else: 

2596 class CustomLexerWrapper(Lexer): 

2597 def __init__(self, lexer_conf): 

2598 self.lexer = lexer_class(lexer_conf) 

2599 def lex(self, lexer_state, parser_state): 

2600 return self.lexer.lex(lexer_state.text) 

2601 return CustomLexerWrapper 

2602 

2603 

2604def _deserialize_parsing_frontend(data, memo, lexer_conf, callbacks, options): 

2605 parser_conf = ParserConf.deserialize(data['parser_conf'], memo) 

2606 cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser 

2607 parser = cls.deserialize(data['parser'], memo, callbacks, options.debug) 

2608 parser_conf.callbacks = callbacks 

2609 return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser) 

2610 

2611 

2612_parser_creators: 'Dict[str, Callable[[LexerConf, Any, Any], Any]]' = {} 

2613 

2614 

2615class ParsingFrontend(Serialize): 

2616 __serialize_fields__ = 'lexer_conf', 'parser_conf', 'parser' 

2617 

2618 lexer_conf: LexerConf 

2619 parser_conf: ParserConf 

2620 options: Any 

2621 

2622 def __init__(self, lexer_conf: LexerConf, parser_conf: ParserConf, options, parser=None): 

2623 self.parser_conf = parser_conf 

2624 self.lexer_conf = lexer_conf 

2625 self.options = options 

2626 

2627 ## 

2628 

2629 if parser: ## 2629 ↛ 2633line 2629 didn't jump to line 2633, because the condition on line 2629 was never false

2630 

2631 self.parser = parser 

2632 else: 

2633 create_parser = _parser_creators.get(parser_conf.parser_type) 

2634 assert create_parser is not None, "{} is not supported in standalone mode".format( 

2635 parser_conf.parser_type 

2636 ) 

2637 self.parser = create_parser(lexer_conf, parser_conf, options) 

2638 

2639 ## 

2640 

2641 lexer_type = lexer_conf.lexer_type 

2642 self.skip_lexer = False 

2643 if lexer_type in ('dynamic', 'dynamic_complete'): 2643 ↛ 2644line 2643 didn't jump to line 2644, because the condition on line 2643 was never true

2644 assert lexer_conf.postlex is None 

2645 self.skip_lexer = True 

2646 return 

2647 

2648 if isinstance(lexer_type, type): 2648 ↛ 2649line 2648 didn't jump to line 2649, because the condition on line 2648 was never true

2649 assert issubclass(lexer_type, Lexer) 

2650 self.lexer = _wrap_lexer(lexer_type)(lexer_conf) 

2651 elif isinstance(lexer_type, str): 2651 ↛ 2658line 2651 didn't jump to line 2658, because the condition on line 2651 was never false

2652 create_lexer = { 

2653 'basic': create_basic_lexer, 

2654 'contextual': create_contextual_lexer, 

2655 }[lexer_type] 

2656 self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex, options) 

2657 else: 

2658 raise TypeError("Bad value for lexer_type: {lexer_type}") 

2659 

2660 if lexer_conf.postlex: 2660 ↛ 2661line 2660 didn't jump to line 2661, because the condition on line 2660 was never true

2661 self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex) 

2662 

2663 def _verify_start(self, start=None): 

2664 if start is None: 2664 ↛ 2669line 2664 didn't jump to line 2669, because the condition on line 2664 was never false

2665 start_decls = self.parser_conf.start 

2666 if len(start_decls) > 1: 2666 ↛ 2667line 2666 didn't jump to line 2667, because the condition on line 2666 was never true

2667 raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start_decls) 

2668 start ,= start_decls 

2669 elif start not in self.parser_conf.start: 

2670 raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start)) 

2671 return start 

2672 

2673 def _make_lexer_thread(self, text: str) -> Union[str, LexerThread]: 

2674 cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread 

2675 return text if self.skip_lexer else cls.from_text(self.lexer, text) 

2676 

2677 def parse(self, text: str, start=None, on_error=None): 

2678 chosen_start = self._verify_start(start) 

2679 kw = {} if on_error is None else {'on_error': on_error} 

2680 stream = self._make_lexer_thread(text) 

2681 return self.parser.parse(stream, chosen_start, **kw) 

2682 

2683 def parse_interactive(self, text: Optional[str]=None, start=None): 

2684 ## 

2685 

2686 ## 

2687 

2688 chosen_start = self._verify_start(start) 

2689 if self.parser_conf.parser_type != 'lalr': 

2690 raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ") 

2691 stream = self._make_lexer_thread(text) ## 

2692 

2693 return self.parser.parse_interactive(stream, chosen_start) 

2694 

2695 

2696def _validate_frontend_args(parser, lexer) -> None: 

2697 assert_config(parser, ('lalr', 'earley', 'cyk')) 

2698 if not isinstance(lexer, type): ## 2698 ↛ exitline 2698 didn't return from function '_validate_frontend_args', because the condition on line 2698 was never false

2699 

2700 expected = { 

2701 'lalr': ('basic', 'contextual'), 

2702 'earley': ('basic', 'dynamic', 'dynamic_complete'), 

2703 'cyk': ('basic', ), 

2704 }[parser] 

2705 assert_config(lexer, expected, 'Parser %r does not support lexer %%r, expected one of %%s' % parser) 

2706 

2707 

2708def _get_lexer_callbacks(transformer, terminals): 

2709 result = {} 

2710 for terminal in terminals: 

2711 callback = getattr(transformer, terminal.name, None) 

2712 if callback is not None: 2712 ↛ 2713line 2712 didn't jump to line 2713, because the condition on line 2712 was never true

2713 result[terminal.name] = callback 

2714 return result 

2715 

2716class PostLexConnector: 

2717 def __init__(self, lexer, postlexer): 

2718 self.lexer = lexer 

2719 self.postlexer = postlexer 

2720 

2721 def lex(self, lexer_state, parser_state): 

2722 i = self.lexer.lex(lexer_state, parser_state) 

2723 return self.postlexer.process(i) 

2724 

2725 

2726 

2727def create_basic_lexer(lexer_conf, parser, postlex, options) -> BasicLexer: 

2728 cls = (options and options._plugins.get('BasicLexer')) or BasicLexer 

2729 return cls(lexer_conf) 

2730 

2731def create_contextual_lexer(lexer_conf: LexerConf, parser, postlex, options) -> ContextualLexer: 

2732 cls = (options and options._plugins.get('ContextualLexer')) or ContextualLexer 

2733 parse_table: ParseTableBase[int] = parser._parse_table 

2734 states: Dict[int, Collection[str]] = {idx:list(t.keys()) for idx, t in parse_table.states.items()} 

2735 always_accept: Collection[str] = postlex.always_accept if postlex else () 

2736 return cls(lexer_conf, states, always_accept=always_accept) 

2737 

2738def create_lalr_parser(lexer_conf: LexerConf, parser_conf: ParserConf, options=None) -> LALR_Parser: 

2739 debug = options.debug if options else False 

2740 strict = options.strict if options else False 

2741 cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser 

2742 return cls(parser_conf, debug=debug, strict=strict) 

2743 

2744_parser_creators['lalr'] = create_lalr_parser 

2745 

2746 

2747 

2748 

2749class PostLex(ABC): 

2750 @abstractmethod 

2751 def process(self, stream: Iterator[Token]) -> Iterator[Token]: 

2752 return stream 

2753 

2754 always_accept: Iterable[str] = () 

2755 

2756class LarkOptions(Serialize): 

2757 #-- 

2758 

2759 start: List[str] 

2760 debug: bool 

2761 strict: bool 

2762 transformer: 'Optional[Transformer]' 

2763 propagate_positions: Union[bool, str] 

2764 maybe_placeholders: bool 

2765 cache: Union[bool, str] 

2766 regex: bool 

2767 g_regex_flags: int 

2768 keep_all_tokens: bool 

2769 tree_class: Optional[Callable[[str, List], Any]] 

2770 parser: _ParserArgType 

2771 lexer: _LexerArgType 

2772 ambiguity: 'Literal["auto", "resolve", "explicit", "forest"]' 

2773 postlex: Optional[PostLex] 

2774 priority: 'Optional[Literal["auto", "normal", "invert"]]' 

2775 lexer_callbacks: Dict[str, Callable[[Token], Token]] 

2776 use_bytes: bool 

2777 ordered_sets: bool 

2778 edit_terminals: Optional[Callable[[TerminalDef], TerminalDef]] 

2779 import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]' 

2780 source_path: Optional[str] 

2781 

2782 OPTIONS_DOC = r""" 

2783 **=== General Options ===** 

2784 

2785 start 

2786 The start symbol. Either a string, or a list of strings for multiple possible starts (Default: "start") 

2787 debug 

2788 Display debug information and extra warnings. Use only when debugging (Default: ``False``) 

2789 When used with Earley, it generates a forest graph as "sppf.png", if 'dot' is installed. 

2790 strict 

2791 Throw an exception on any potential ambiguity, including shift/reduce conflicts, and regex collisions. 

2792 transformer 

2793 Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster) 

2794 propagate_positions 

2795 Propagates positional attributes into the 'meta' attribute of all tree branches. 

2796 Sets attributes: (line, column, end_line, end_column, start_pos, end_pos, 

2797 container_line, container_column, container_end_line, container_end_column) 

2798 Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating. 

2799 maybe_placeholders 

2800 When ``True``, the ``[]`` operator returns ``None`` when not matched. 

2801 When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all. 

2802 (default= ``True``) 

2803 cache 

2804 Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now. 

2805 

2806 - When ``False``, does nothing (default) 

2807 - When ``True``, caches to a temporary file in the local directory 

2808 - When given a string, caches to the path pointed by the string 

2809 regex 

2810 When True, uses the ``regex`` module instead of the stdlib ``re``. 

2811 g_regex_flags 

2812 Flags that are applied to all terminals (both regex and strings) 

2813 keep_all_tokens 

2814 Prevent the tree builder from automagically removing "punctuation" tokens (Default: ``False``) 

2815 tree_class 

2816 Lark will produce trees comprised of instances of this class instead of the default ``lark.Tree``. 

2817 

2818 **=== Algorithm Options ===** 

2819 

2820 parser 

2821 Decides which parser engine to use. Accepts "earley" or "lalr". (Default: "earley"). 

2822 (there is also a "cyk" option for legacy) 

2823 lexer 

2824 Decides whether or not to use a lexer stage 

2825 

2826 - "auto" (default): Choose for me based on the parser 

2827 - "basic": Use a basic lexer 

2828 - "contextual": Stronger lexer (only works with parser="lalr") 

2829 - "dynamic": Flexible and powerful (only with parser="earley") 

2830 - "dynamic_complete": Same as dynamic, but tries *every* variation of tokenizing possible. 

2831 ambiguity 

2832 Decides how to handle ambiguity in the parse. Only relevant if parser="earley" 

2833 

2834 - "resolve": The parser will automatically choose the simplest derivation 

2835 (it chooses consistently: greedy for tokens, non-greedy for rules) 

2836 - "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest). 

2837 - "forest": The parser will return the root of the shared packed parse forest. 

2838 

2839 **=== Misc. / Domain Specific Options ===** 

2840 

2841 postlex 

2842 Lexer post-processing (Default: ``None``) Only works with the basic and contextual lexers. 

2843 priority 

2844 How priorities should be evaluated - "auto", ``None``, "normal", "invert" (Default: "auto") 

2845 lexer_callbacks 

2846 Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution. 

2847 use_bytes 

2848 Accept an input of type ``bytes`` instead of ``str``. 

2849 ordered_sets 

2850 Should Earley use ordered-sets to achieve stable output (~10% slower than regular sets. Default: True) 

2851 edit_terminals 

2852 A callback for editing the terminals before parse. 

2853 import_paths 

2854 A List of either paths or loader functions to specify from where grammars are imported 

2855 source_path 

2856 Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading 

2857 **=== End of Options ===** 

2858 """ 

2859 if __doc__: 2859 ↛ 2860line 2859 didn't jump to line 2860, because the condition on line 2859 was never true

2860 __doc__ += OPTIONS_DOC 

2861 

2862 

2863 ## 

2864 

2865 ## 

2866 

2867 ## 

2868 

2869 ## 

2870 

2871 ## 

2872 

2873 ## 

2874 

2875 _defaults: Dict[str, Any] = { 

2876 'debug': False, 

2877 'strict': False, 

2878 'keep_all_tokens': False, 

2879 'tree_class': None, 

2880 'cache': False, 

2881 'postlex': None, 

2882 'parser': 'earley', 

2883 'lexer': 'auto', 

2884 'transformer': None, 

2885 'start': 'start', 

2886 'priority': 'auto', 

2887 'ambiguity': 'auto', 

2888 'regex': False, 

2889 'propagate_positions': False, 

2890 'lexer_callbacks': {}, 

2891 'maybe_placeholders': True, 

2892 'edit_terminals': None, 

2893 'g_regex_flags': 0, 

2894 'use_bytes': False, 

2895 'ordered_sets': True, 

2896 'import_paths': [], 

2897 'source_path': None, 

2898 '_plugins': {}, 

2899 } 

2900 

2901 def __init__(self, options_dict: Dict[str, Any]) -> None: 

2902 o = dict(options_dict) 

2903 

2904 options = {} 

2905 for name, default in self._defaults.items(): 

2906 if name in o: 2906 ↛ 2911line 2906 didn't jump to line 2911, because the condition on line 2906 was never false

2907 value = o.pop(name) 

2908 if isinstance(default, bool) and name not in ('cache', 'use_bytes', 'propagate_positions'): 

2909 value = bool(value) 

2910 else: 

2911 value = default 

2912 

2913 options[name] = value 

2914 

2915 if isinstance(options['start'], str): 2915 ↛ 2916line 2915 didn't jump to line 2916, because the condition on line 2915 was never true

2916 options['start'] = [options['start']] 

2917 

2918 self.__dict__['options'] = options 

2919 

2920 

2921 assert_config(self.parser, ('earley', 'lalr', 'cyk', None)) 

2922 

2923 if self.parser == 'earley' and self.transformer: 2923 ↛ 2924line 2923 didn't jump to line 2924, because the condition on line 2923 was never true

2924 raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm. ' 

2925 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)') 

2926 

2927 if o: 2927 ↛ 2928line 2927 didn't jump to line 2928, because the condition on line 2927 was never true

2928 raise ConfigurationError("Unknown options: %s" % o.keys()) 

2929 

2930 def __getattr__(self, name: str) -> Any: 

2931 try: 

2932 return self.__dict__['options'][name] 

2933 except KeyError as e: 

2934 raise AttributeError(e) 

2935 

2936 def __setattr__(self, name: str, value: str) -> None: 

2937 assert_config(name, self.options.keys(), "%r isn't a valid option. Expected one of: %s") 

2938 self.options[name] = value 

2939 

2940 def serialize(self, memo = None) -> Dict[str, Any]: 

2941 return self.options 

2942 

2943 @classmethod 

2944 def deserialize(cls, data: Dict[str, Any], memo: Dict[int, Union[TerminalDef, Rule]]) -> "LarkOptions": 

2945 return cls(data) 

2946 

2947 

2948## 

2949 

2950## 

2951 

2952_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'lexer_callbacks', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class', '_plugins'} 

2953 

2954_VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None) 

2955_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') 

2956 

2957 

2958_T = TypeVar('_T', bound="Lark") 

2959 

2960class Lark(Serialize): 

2961 #-- 

2962 

2963 source_path: str 

2964 source_grammar: str 

2965 grammar: 'Grammar' 

2966 options: LarkOptions 

2967 lexer: Lexer 

2968 parser: 'ParsingFrontend' 

2969 terminals: Collection[TerminalDef] 

2970 

2971 def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None: 

2972 self.options = LarkOptions(options) 

2973 re_module: types.ModuleType 

2974 

2975 ## 

2976 

2977 use_regex = self.options.regex 

2978 if use_regex: 

2979 if _has_regex: 

2980 re_module = regex 

2981 else: 

2982 raise ImportError('`regex` module must be installed if calling `Lark(regex=True)`.') 

2983 else: 

2984 re_module = re 

2985 

2986 ## 

2987 

2988 if self.options.source_path is None: 

2989 try: 

2990 self.source_path = grammar.name ## 

2991 

2992 except AttributeError: 

2993 self.source_path = '<string>' 

2994 else: 

2995 self.source_path = self.options.source_path 

2996 

2997 ## 

2998 

2999 try: 

3000 read = grammar.read ## 

3001 

3002 except AttributeError: 

3003 pass 

3004 else: 

3005 grammar = read() 

3006 

3007 cache_fn = None 

3008 cache_sha256 = None 

3009 if isinstance(grammar, str): 

3010 self.source_grammar = grammar 

3011 if self.options.use_bytes: 

3012 if not isascii(grammar): 

3013 raise ConfigurationError("Grammar must be ascii only, when use_bytes=True") 

3014 

3015 if self.options.cache: 

3016 if self.options.parser != 'lalr': 

3017 raise ConfigurationError("cache only works with parser='lalr' for now") 

3018 

3019 unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals', '_plugins') 

3020 options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable) 

3021 from . import __version__ 

3022 s = grammar + options_str + __version__ + str(sys.version_info[:2]) 

3023 cache_sha256 = sha256_digest(s) 

3024 

3025 if isinstance(self.options.cache, str): 

3026 cache_fn = self.options.cache 

3027 else: 

3028 if self.options.cache is not True: 

3029 raise ConfigurationError("cache argument must be bool or str") 

3030 

3031 try: 

3032 username = getpass.getuser() 

3033 except Exception: 

3034 ## 

3035 

3036 ## 

3037 

3038 ## 

3039 

3040 username = "unknown" 

3041 

3042 cache_fn = tempfile.gettempdir() + "/.lark_cache_%s_%s_%s_%s.tmp" % (username, cache_sha256, *sys.version_info[:2]) 

3043 

3044 old_options = self.options 

3045 try: 

3046 with FS.open(cache_fn, 'rb') as f: 

3047 logger.debug('Loading grammar from cache: %s', cache_fn) 

3048 ## 

3049 

3050 for name in (set(options) - _LOAD_ALLOWED_OPTIONS): 

3051 del options[name] 

3052 file_sha256 = f.readline().rstrip(b'\n') 

3053 cached_used_files = pickle.load(f) 

3054 if file_sha256 == cache_sha256.encode('utf8') and verify_used_files(cached_used_files): 

3055 cached_parser_data = pickle.load(f) 

3056 self._load(cached_parser_data, **options) 

3057 return 

3058 except FileNotFoundError: 

3059 ## 

3060 

3061 pass 

3062 except Exception: ## 

3063 

3064 logger.exception("Failed to load Lark from cache: %r. We will try to carry on.", cache_fn) 

3065 

3066 ## 

3067 

3068 ## 

3069 

3070 self.options = old_options 

3071 

3072 

3073 ## 

3074 

3075 self.grammar, used_files = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens) 

3076 else: 

3077 assert isinstance(grammar, Grammar) 

3078 self.grammar = grammar 

3079 

3080 

3081 if self.options.lexer == 'auto': 

3082 if self.options.parser == 'lalr': 

3083 self.options.lexer = 'contextual' 

3084 elif self.options.parser == 'earley': 

3085 if self.options.postlex is not None: 

3086 logger.info("postlex can't be used with the dynamic lexer, so we use 'basic' instead. " 

3087 "Consider using lalr with contextual instead of earley") 

3088 self.options.lexer = 'basic' 

3089 else: 

3090 self.options.lexer = 'dynamic' 

3091 elif self.options.parser == 'cyk': 

3092 self.options.lexer = 'basic' 

3093 else: 

3094 assert False, self.options.parser 

3095 lexer = self.options.lexer 

3096 if isinstance(lexer, type): 

3097 assert issubclass(lexer, Lexer) ## 

3098 

3099 else: 

3100 assert_config(lexer, ('basic', 'contextual', 'dynamic', 'dynamic_complete')) 

3101 if self.options.postlex is not None and 'dynamic' in lexer: 

3102 raise ConfigurationError("Can't use postlex with a dynamic lexer. Use basic or contextual instead") 

3103 

3104 if self.options.ambiguity == 'auto': 

3105 if self.options.parser == 'earley': 

3106 self.options.ambiguity = 'resolve' 

3107 else: 

3108 assert_config(self.options.parser, ('earley', 'cyk'), "%r doesn't support disambiguation. Use one of these parsers instead: %s") 

3109 

3110 if self.options.priority == 'auto': 

3111 self.options.priority = 'normal' 

3112 

3113 if self.options.priority not in _VALID_PRIORITY_OPTIONS: 

3114 raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS)) 

3115 if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS: 

3116 raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) 

3117 

3118 if self.options.parser is None: 

3119 terminals_to_keep = '*' 

3120 elif self.options.postlex is not None: 

3121 terminals_to_keep = set(self.options.postlex.always_accept) 

3122 else: 

3123 terminals_to_keep = set() 

3124 

3125 ## 

3126 

3127 self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start, terminals_to_keep) 

3128 

3129 if self.options.edit_terminals: 

3130 for t in self.terminals: 

3131 self.options.edit_terminals(t) 

3132 

3133 self._terminals_dict = {t.name: t for t in self.terminals} 

3134 

3135 ## 

3136 

3137 if self.options.priority == 'invert': 

3138 for rule in self.rules: 

3139 if rule.options.priority is not None: 

3140 rule.options.priority = -rule.options.priority 

3141 for term in self.terminals: 

3142 term.priority = -term.priority 

3143 ## 

3144 

3145 ## 

3146 

3147 ## 

3148 

3149 elif self.options.priority is None: 

3150 for rule in self.rules: 

3151 if rule.options.priority is not None: 

3152 rule.options.priority = None 

3153 for term in self.terminals: 

3154 term.priority = 0 

3155 

3156 ## 

3157 

3158 self.lexer_conf = LexerConf( 

3159 self.terminals, re_module, self.ignore_tokens, self.options.postlex, 

3160 self.options.lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes, strict=self.options.strict 

3161 ) 

3162 

3163 if self.options.parser: 

3164 self.parser = self._build_parser() 

3165 elif lexer: 

3166 self.lexer = self._build_lexer() 

3167 

3168 if cache_fn: 

3169 logger.debug('Saving grammar to cache: %s', cache_fn) 

3170 try: 

3171 with FS.open(cache_fn, 'wb') as f: 

3172 assert cache_sha256 is not None 

3173 f.write(cache_sha256.encode('utf8') + b'\n') 

3174 pickle.dump(used_files, f) 

3175 self.save(f, _LOAD_ALLOWED_OPTIONS) 

3176 except IOError as e: 

3177 logger.exception("Failed to save Lark to cache: %r.", cache_fn, e) 

3178 

3179 if __doc__: 3179 ↛ 3180line 3179 didn't jump to line 3180, because the condition on line 3179 was never true

3180 __doc__ += "\n\n" + LarkOptions.OPTIONS_DOC 

3181 

3182 __serialize_fields__ = 'parser', 'rules', 'options' 

3183 

3184 def _build_lexer(self, dont_ignore: bool=False) -> BasicLexer: 

3185 lexer_conf = self.lexer_conf 

3186 if dont_ignore: 

3187 from copy import copy 

3188 lexer_conf = copy(lexer_conf) 

3189 lexer_conf.ignore = () 

3190 return BasicLexer(lexer_conf) 

3191 

3192 def _prepare_callbacks(self) -> None: 

3193 self._callbacks = {} 

3194 ## 

3195 

3196 if self.options.ambiguity != 'forest': 3196 ↛ 3205line 3196 didn't jump to line 3205, because the condition on line 3196 was never false

3197 self._parse_tree_builder = ParseTreeBuilder( 

3198 self.rules, 

3199 self.options.tree_class or Tree, 

3200 self.options.propagate_positions, 

3201 self.options.parser != 'lalr' and self.options.ambiguity == 'explicit', 

3202 self.options.maybe_placeholders 

3203 ) 

3204 self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) 

3205 self._callbacks.update(_get_lexer_callbacks(self.options.transformer, self.terminals)) 

3206 

3207 def _build_parser(self) -> "ParsingFrontend": 

3208 self._prepare_callbacks() 

3209 _validate_frontend_args(self.options.parser, self.options.lexer) 

3210 parser_conf = ParserConf(self.rules, self._callbacks, self.options.start) 

3211 return _construct_parsing_frontend( 

3212 self.options.parser, 

3213 self.options.lexer, 

3214 self.lexer_conf, 

3215 parser_conf, 

3216 options=self.options 

3217 ) 

3218 

3219 def save(self, f, exclude_options: Collection[str] = ()) -> None: 

3220 #-- 

3221 if self.options.parser != 'lalr': 

3222 raise NotImplementedError("Lark.save() is only implemented for the LALR(1) parser.") 

3223 data, m = self.memo_serialize([TerminalDef, Rule]) 

3224 if exclude_options: 

3225 data["options"] = {n: v for n, v in data["options"].items() if n not in exclude_options} 

3226 pickle.dump({'data': data, 'memo': m}, f, protocol=pickle.HIGHEST_PROTOCOL) 

3227 

3228 @classmethod 

3229 def load(cls: Type[_T], f) -> _T: 

3230 #-- 

3231 inst = cls.__new__(cls) 

3232 return inst._load(f) 

3233 

3234 def _deserialize_lexer_conf(self, data: Dict[str, Any], memo: Dict[int, Union[TerminalDef, Rule]], options: LarkOptions) -> LexerConf: 

3235 lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo) 

3236 lexer_conf.callbacks = options.lexer_callbacks or {} 

3237 lexer_conf.re_module = regex if options.regex else re 

3238 lexer_conf.use_bytes = options.use_bytes 

3239 lexer_conf.g_regex_flags = options.g_regex_flags 

3240 lexer_conf.skip_validation = True 

3241 lexer_conf.postlex = options.postlex 

3242 return lexer_conf 

3243 

3244 def _load(self: _T, f: Any, **kwargs) -> _T: 

3245 if isinstance(f, dict): 3245 ↛ 3248line 3245 didn't jump to line 3248, because the condition on line 3245 was never false

3246 d = f 

3247 else: 

3248 d = pickle.load(f) 

3249 memo_json = d['memo'] 

3250 data = d['data'] 

3251 

3252 assert memo_json 

3253 memo = SerializeMemoizer.deserialize(memo_json, {'Rule': Rule, 'TerminalDef': TerminalDef}, {}) 

3254 options = dict(data['options']) 

3255 if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults): 3255 ↛ 3256line 3255 didn't jump to line 3256, because the condition on line 3255 was never true

3256 raise ConfigurationError("Some options are not allowed when loading a Parser: {}" 

3257 .format(set(kwargs) - _LOAD_ALLOWED_OPTIONS)) 

3258 options.update(kwargs) 

3259 self.options = LarkOptions.deserialize(options, memo) 

3260 self.rules = [Rule.deserialize(r, memo) for r in data['rules']] 

3261 self.source_path = '<deserialized>' 

3262 _validate_frontend_args(self.options.parser, self.options.lexer) 

3263 self.lexer_conf = self._deserialize_lexer_conf(data['parser'], memo, self.options) 

3264 self.terminals = self.lexer_conf.terminals 

3265 self._prepare_callbacks() 

3266 self._terminals_dict = {t.name: t for t in self.terminals} 

3267 self.parser = _deserialize_parsing_frontend( 

3268 data['parser'], 

3269 memo, 

3270 self.lexer_conf, 

3271 self._callbacks, 

3272 self.options, ## 

3273 

3274 ) 

3275 return self 

3276 

3277 @classmethod 

3278 def _load_from_dict(cls, data, memo, **kwargs): 

3279 inst = cls.__new__(cls) 

3280 return inst._load({'data': data, 'memo': memo}, **kwargs) 

3281 

3282 @classmethod 

3283 def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str]=None, **options) -> _T: 

3284 #-- 

3285 if rel_to: 

3286 basepath = os.path.dirname(rel_to) 

3287 grammar_filename = os.path.join(basepath, grammar_filename) 

3288 with open(grammar_filename, encoding='utf8') as f: 

3289 return cls(f, **options) 

3290 

3291 @classmethod 

3292 def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: 'Sequence[str]'=[""], **options) -> _T: 

3293 #-- 

3294 package_loader = FromPackageLoader(package, search_paths) 

3295 full_path, text = package_loader(None, grammar_path) 

3296 options.setdefault('source_path', full_path) 

3297 options.setdefault('import_paths', []) 

3298 options['import_paths'].append(package_loader) 

3299 return cls(text, **options) 

3300 

3301 def __repr__(self): 

3302 return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer) 

3303 

3304 

3305 def lex(self, text: str, dont_ignore: bool=False) -> Iterator[Token]: 

3306 #-- 

3307 lexer: Lexer 

3308 if not hasattr(self, 'lexer') or dont_ignore: 

3309 lexer = self._build_lexer(dont_ignore) 

3310 else: 

3311 lexer = self.lexer 

3312 lexer_thread = LexerThread.from_text(lexer, text) 

3313 stream = lexer_thread.lex(None) 

3314 if self.options.postlex: 

3315 return self.options.postlex.process(stream) 

3316 return stream 

3317 

3318 def get_terminal(self, name: str) -> TerminalDef: 

3319 #-- 

3320 return self._terminals_dict[name] 

3321 

3322 def parse_interactive(self, text: Optional[str]=None, start: Optional[str]=None) -> 'InteractiveParser': 

3323 #-- 

3324 return self.parser.parse_interactive(text, start=start) 

3325 

3326 def parse(self, text: str, start: Optional[str]=None, on_error: 'Optional[Callable[[UnexpectedInput], bool]]'=None) -> 'ParseTree': 

3327 #-- 

3328 return self.parser.parse(text, start=start, on_error=on_error) 

3329 

3330 def scan(self, text: str, start: Optional[str]=None) -> Iterator[Tuple[Tuple[int, int], 'ParseTree']]: 

3331 #-- 

3332 if self.options.parser != 'lalr' or self.options.lexer != 'contextual': 

3333 raise ValueError("scan requires parser='lalr' and lexer='contextual'") 

3334 start_states = self.parser.parser._parse_table.start_states 

3335 if start is None: 

3336 if len(start_states) != 1: 

3337 raise ValueError("Need to specify start") 

3338 start, = start_states 

3339 start_state = start_states[start] 

3340 start_lex: BasicLexer = self.parser.lexer.lexers[start_state] 

3341 pos = 0 

3342 while True: 

3343 start_pos = start_lex.scanner.search(text, pos) 

3344 if start_pos is None: 

3345 break 

3346 valid_end = [] 

3347 ip = self.parse_interactive(text[start_pos:], start=start) 

3348 tokens = ip.lexer_thread.lex(ip.parser_state) 

3349 while True: 

3350 try: 

3351 token = next(tokens) 

3352 ip.feed_token(token) 

3353 except (UnexpectedInput, StopIteration): 

3354 break 

3355 if '$END' in ip.choices(): 

3356 valid_end.append((token, ip.copy())) 

3357 for (last, pot) in valid_end[::-1]: 

3358 try: 

3359 res = pot.feed_eof(last) 

3360 except UnexpectedInput: 

3361 continue 

3362 else: 

3363 yield ((start_pos, start_pos + last.end_pos), res) 

3364 pos = start_pos + last.end_pos 

3365 break 

3366 else: 

3367 pos = start_pos + 1 

3368 

3369 

3370 

3371class DedentError(LarkError): 

3372 pass 

3373 

3374class Indenter(PostLex, ABC): 

3375 paren_level: int 

3376 indent_level: List[int] 

3377 

3378 def __init__(self) -> None: 

3379 self.paren_level = 0 

3380 self.indent_level = [0] 

3381 assert self.tab_len > 0 

3382 

3383 def handle_NL(self, token: Token) -> Iterator[Token]: 

3384 if self.paren_level > 0: 

3385 return 

3386 

3387 yield token 

3388 

3389 indent_str = token.rsplit('\n', 1)[1] ## 

3390 

3391 indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len 

3392 

3393 if indent > self.indent_level[-1]: 

3394 self.indent_level.append(indent) 

3395 yield Token.new_borrow_pos(self.INDENT_type, indent_str, token) 

3396 else: 

3397 while indent < self.indent_level[-1]: 

3398 self.indent_level.pop() 

3399 yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token) 

3400 

3401 if indent != self.indent_level[-1]: 

3402 raise DedentError('Unexpected dedent to column %s. Expected dedent to %s' % (indent, self.indent_level[-1])) 

3403 

3404 def _process(self, stream): 

3405 for token in stream: 

3406 if token.type == self.NL_type: 

3407 yield from self.handle_NL(token) 

3408 else: 

3409 yield token 

3410 

3411 if token.type in self.OPEN_PAREN_types: 

3412 self.paren_level += 1 

3413 elif token.type in self.CLOSE_PAREN_types: 

3414 self.paren_level -= 1 

3415 assert self.paren_level >= 0 

3416 

3417 while len(self.indent_level) > 1: 

3418 self.indent_level.pop() 

3419 yield Token(self.DEDENT_type, '') 

3420 

3421 assert self.indent_level == [0], self.indent_level 

3422 

3423 def process(self, stream): 

3424 self.paren_level = 0 

3425 self.indent_level = [0] 

3426 return self._process(stream) 

3427 

3428 ## 

3429 

3430 @property 

3431 def always_accept(self): 

3432 return (self.NL_type,) 

3433 

3434 @property 

3435 @abstractmethod 

3436 def NL_type(self) -> str: 

3437 raise NotImplementedError() 

3438 

3439 @property 

3440 @abstractmethod 

3441 def OPEN_PAREN_types(self) -> List[str]: 

3442 raise NotImplementedError() 

3443 

3444 @property 

3445 @abstractmethod 

3446 def CLOSE_PAREN_types(self) -> List[str]: 

3447 raise NotImplementedError() 

3448 

3449 @property 

3450 @abstractmethod 

3451 def INDENT_type(self) -> str: 

3452 raise NotImplementedError() 

3453 

3454 @property 

3455 @abstractmethod 

3456 def DEDENT_type(self) -> str: 

3457 raise NotImplementedError() 

3458 

3459 @property 

3460 @abstractmethod 

3461 def tab_len(self) -> int: 

3462 raise NotImplementedError() 

3463 

3464 

3465class PythonIndenter(Indenter): 

3466 NL_type = '_NEWLINE' 

3467 OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE'] 

3468 CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE'] 

3469 INDENT_type = '_INDENT' 

3470 DEDENT_type = '_DEDENT' 

3471 tab_len = 8 

3472 

3473 

3474import pickle, zlib, base64 

3475DATA = ( 

3476{'parser': {'lexer_conf': {'terminals': [{'@': 0}, {'@': 1}, {'@': 2}, {'@': 3}, {'@': 4}, {'@': 5}, {'@': 6}, {'@': 7}, {'@': 8}], 'ignore': ['WS'], 'g_regex_flags': 0, 'use_bytes': False, 'lexer_type': 'contextual', '__type__': 'LexerConf'}, 'parser_conf': {'rules': [{'@': 9}, {'@': 10}, {'@': 11}, {'@': 12}, {'@': 13}, {'@': 14}, {'@': 15}, {'@': 16}, {'@': 17}, {'@': 18}, {'@': 19}, {'@': 20}], 'start': ['start'], 'parser_type': 'lalr', '__type__': 'ParserConf'}, 'parser': {'tokens': {0: '__argument_list_star_0', 1: 'COMMA', 2: 'RPAR', 3: 'argument', 4: 'key', 5: 'CNAME', 6: 'COLON', 7: 'PYTHON', 8: 'ident', 9: 'AT', 10: 'start', 11: 'ESCAPED_STRING', 12: 'value', 13: 'LPAR', 14: '$END', 15: 'argument_list'}, 'states': {0: {0: (0, 4), 1: (0, 1), 2: (1, {'@': 14})}, 1: {3: (0, 18), 4: (0, 3), 5: (0, 13), 2: (1, {'@': 13})}, 2: {}, 3: {6: (0, 11)}, 4: {1: (0, 6), 2: (1, {'@': 12})}, 5: {2: (1, {'@': 20}), 1: (1, {'@': 20})}, 6: {4: (0, 3), 3: (0, 5), 5: (0, 13), 2: (1, {'@': 11})}, 7: {2: (0, 14)}, 8: {7: (0, 17), 8: (0, 12)}, 9: {9: (0, 8), 10: (0, 2)}, 10: {2: (1, {'@': 16}), 1: (1, {'@': 16})}, 11: {11: (0, 15), 12: (0, 10)}, 12: {13: (0, 16)}, 13: {6: (1, {'@': 17})}, 14: {14: (1, {'@': 9})}, 15: {2: (1, {'@': 18}), 1: (1, {'@': 18})}, 16: {3: (0, 0), 4: (0, 3), 5: (0, 13), 15: (0, 7), 2: (1, {'@': 15})}, 17: {13: (1, {'@': 10})}, 18: {2: (1, {'@': 19}), 1: (1, {'@': 19})}}, 'start_states': {'start': 9}, 'end_states': {'start': 2}}, '__type__': 'ParsingFrontend'}, 'rules': [{'@': 9}, {'@': 10}, {'@': 11}, {'@': 12}, {'@': 13}, {'@': 14}, {'@': 15}, {'@': 16}, {'@': 17}, {'@': 18}, {'@': 19}, {'@': 20}], 'options': {'debug': False, 'strict': False, 'keep_all_tokens': False, 'tree_class': None, 'cache': False, 'postlex': None, 'parser': 'lalr', 'lexer': 'contextual', 'transformer': None, 'start': ['start'], 'priority': 'normal', 'ambiguity': 'auto', 'regex': False, 'propagate_positions': False, 'lexer_callbacks': {}, 'maybe_placeholders': False, 'edit_terminals': None, 'g_regex_flags': 0, 'use_bytes': False, 'ordered_sets': True, 'import_paths': [], 'source_path': None, '_plugins': {}}, '__type__': 'Lark'} 

3477) 

3478MEMO = ( 

3479{0: {'name': 'ESCAPED_STRING', 'pattern': {'value': '(?:\'.*?(?<!\\\\)(\\\\\\\\)*?\'|".*?(?<!\\\\)(\\\\\\\\)*?")', 'flags': [], 'raw': None, '_width': [2, 4294967295], '__type__': 'PatternRE'}, 'priority': 0, '__type__': 'TerminalDef'}, 1: {'name': 'WS', 'pattern': {'value': '(?:[ \t\x0c\r\n])+', 'flags': [], 'raw': None, '_width': [1, 4294967295], '__type__': 'PatternRE'}, 'priority': 0, '__type__': 'TerminalDef'}, 2: {'name': 'CNAME', 'pattern': {'value': '[A-Za-z_][A-Za-z0-9_]*', 'flags': [], 'raw': '/[A-Za-z_][A-Za-z0-9_]*/', '_width': [1, 4294967295], '__type__': 'PatternRE'}, 'priority': 0, '__type__': 'TerminalDef'}, 3: {'name': 'AT', 'pattern': {'value': '@', 'flags': [], 'raw': '"@"', '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 4: {'name': 'LPAR', 'pattern': {'value': '(', 'flags': [], 'raw': '"("', '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 5: {'name': 'RPAR', 'pattern': {'value': ')', 'flags': [], 'raw': '")"', '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 6: {'name': 'PYTHON', 'pattern': {'value': 'Python', 'flags': [], 'raw': '"Python"', '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 7: {'name': 'COMMA', 'pattern': {'value': ',', 'flags': [], 'raw': '","', '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 8: {'name': 'COLON', 'pattern': {'value': ':', 'flags': [], 'raw': '":"', '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 9: {'origin': {'name': Token('RULE', 'start'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'AT', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'ident', '__type__': 'NonTerminal'}, {'name': 'LPAR', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'argument_list', '__type__': 'NonTerminal'}, {'name': 'RPAR', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 10: {'origin': {'name': Token('RULE', 'ident'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'PYTHON', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 11: {'origin': {'name': Token('RULE', 'argument_list'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'argument', '__type__': 'NonTerminal'}, {'name': '__argument_list_star_0', '__type__': 'NonTerminal'}, {'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 12: {'origin': {'name': Token('RULE', 'argument_list'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'argument', '__type__': 'NonTerminal'}, {'name': '__argument_list_star_0', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 13: {'origin': {'name': Token('RULE', 'argument_list'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'argument', '__type__': 'NonTerminal'}, {'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}], 'order': 2, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 14: {'origin': {'name': Token('RULE', 'argument_list'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'argument', '__type__': 'NonTerminal'}], 'order': 3, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 15: {'origin': {'name': Token('RULE', 'argument_list'), '__type__': 'NonTerminal'}, 'expansion': [], 'order': 4, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (True,), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 16: {'origin': {'name': Token('RULE', 'argument'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'key', '__type__': 'NonTerminal'}, {'name': 'COLON', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 17: {'origin': {'name': Token('RULE', 'key'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'CNAME', 'filter_out': False, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 18: {'origin': {'name': Token('RULE', 'value'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'ESCAPED_STRING', 'filter_out': False, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 19: {'origin': {'name': '__argument_list_star_0', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'argument', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 20: {'origin': {'name': '__argument_list_star_0', '__type__': 'NonTerminal'}, 'expansion': [{'name': '__argument_list_star_0', '__type__': 'NonTerminal'}, {'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'argument', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}} 

3480) 

3481Shift = 0 

3482Reduce = 1 

3483def Lark_StandAlone(**kwargs): 

3484 return Lark._load_from_dict(DATA, MEMO, **kwargs)