Source code for blark.parse

"""
`blark parse` is a command-line utility to parse TwinCAT3 source code projects
and files.
"""
from __future__ import annotations

import argparse
import enum
import json
import logging
import pathlib
import sys
import typing
from dataclasses import dataclass
from typing import Generator, Optional, Sequence, Type, TypeVar, Union

import lark

from . import solution
from . import transform as tf
from . import util
from .input import BlarkCompositeSourceItem, BlarkSourceItem, load_file_by_name
from .typing import Preprocessor
from .util import AnyPath

try:
    import apischema
except ImportError:
    apischema = None

if typing.TYPE_CHECKING:
    from .summary import CodeSummary


logger = logging.getLogger(__name__)


DESCRIPTION = __doc__
AnyFile = Union[str, pathlib.Path]

_PARSER = None


[docs] class BlarkStartingRule(enum.Enum): iec_source = enum.auto() action = enum.auto() data_type_declaration = enum.auto() function_block_method_declaration = enum.auto() function_block_property_declaration = enum.auto() function_block_type_declaration = enum.auto() function_declaration = enum.auto() global_var_declarations = enum.auto() interface_declaration = enum.auto() program_declaration = enum.auto() statement_list = enum.auto()
[docs] def new_parser(start: Optional[list[str]] = None, **kwargs) -> lark.Lark: """ Get a new parser for TwinCAT flavor IEC61131-3 code. Parameters ---------- **kwargs : See :class:`lark.lark.LarkOptions`. """ if start is None: start = [rule.name for rule in BlarkStartingRule] from . import GRAMMAR_FILENAME return lark.Lark.open_from_package( "blark", GRAMMAR_FILENAME.name, parser="earley", maybe_placeholders=True, propagate_positions=True, start=start, **kwargs, )
[docs] def get_parser() -> lark.Lark: """Get a cached lark.Lark parser for TwinCAT flavor IEC61131-3 code.""" global _PARSER if _PARSER is None: _PARSER = new_parser() return _PARSER
DEFAULT_PREPROCESSORS = tuple()
[docs] @dataclass class ParseResult: source_code: str item: BlarkSourceItem processed_source_code: str comments: list[lark.Token] line_map: Optional[dict[int, int]] = None filename: Optional[pathlib.Path] = None exception: Optional[Exception] = None tree: Optional[lark.Tree] = None parent: Optional[BlarkCompositeSourceItem] = None transformed: Optional[tf.SourceCode] = None @property def identifier(self) -> Optional[str]: if self.item is None: return None return self.item.identifier
[docs] def transform(self) -> tf.SourceCode: if self.tree is None: raise ValueError( f"Source code was not successfully parsed; cannot transform. " f"Exception was {type(self.exception).__name__}: {self.exception}" ) if self.transformed is None: self.transformed = tf.transform( source_code=self.source_code, tree=self.tree, comments=self.comments, line_map=self.line_map, filename=self.filename, ) return self.transformed
[docs] def dump_source(self, fp=sys.stdout) -> None: if self.line_map is not None: # NOTE: split("\n") and splitlines() differ wrt the final newline code_lines = dict(enumerate(self.source_code.split("\n"), 1)) for code_lineno, source_lineno in self.line_map.items(): line = code_lines[code_lineno] print(f"{code_lineno} ({source_lineno}) {line}", file=fp) else: for lineno, line in enumerate(self.source_code.splitlines(), 1): print(f"{lineno}: {line}", file=fp)
[docs] def parse_source_code( source_code: str, *, verbose: int = 0, fn: AnyPath = "unknown", preprocessors: Sequence[Preprocessor] = DEFAULT_PREPROCESSORS, parser: Optional[lark.Lark] = None, starting_rule: Optional[str] = None, line_map: Optional[dict[int, int]] = None, item: Optional[BlarkSourceItem] = None, ) -> ParseResult: """ Parse source code into a ``ParseResult``. Parameters ---------- source_code : str The source code text. verbose : int, optional Verbosity level for output. (deprecated) fn : pathlib.Path or str, optional The filename associated with the source code. preprocessors : list, optional Callable preprocessors to apply to the source code. parser : lark.Lark, optional The parser instance to use. Defaults to the global shared one from ``get_parser``. """ processed_source = source_code for preprocessor in preprocessors: processed_source = preprocessor(processed_source) comments, processed_source = util.find_and_clean_comments( processed_source, line_map=line_map, ) if parser is None: parser = get_parser() if starting_rule is None: # NOTE: back-compat -> default to 'iec_source' here if "iec_source" in parser.options.start: starting_rule = "iec_source" else: starting_rule = parser.options.start[0] if item is None: item = BlarkSourceItem.from_code( source_code, grammar_rule=starting_rule, ) result = ParseResult( item=item, source_code=source_code, processed_source_code=processed_source, line_map=line_map, comments=comments, filename=pathlib.Path(fn), ) try: result.tree = parser.parse(processed_source, start=starting_rule) except lark.UnexpectedInput as ex: if line_map: ex.line = line_map.get(ex.line, ex.line) result.exception = ex except lark.LarkError as ex: result.exception = ex return result
[docs] def parse_single_file(fn: AnyPath, **kwargs) -> ParseResult: """Parse a single source code file.""" source_code = util.get_source_code(fn) return parse_source_code(source_code, fn=fn, **kwargs)
[docs] def parse_project( tsproj_project: AnyFile, **kwargs, ) -> Generator[ParseResult, None, None]: """Parse an entire tsproj project file.""" sol = solution.make_solution_from_files(tsproj_project) for item in solution.get_blark_input_from_solution(sol): yield from parse_item(item, **kwargs)
[docs] def parse_item( item: Union[BlarkSourceItem, BlarkCompositeSourceItem], **kwargs, ) -> Generator[ParseResult, None, None]: if isinstance(item, BlarkCompositeSourceItem): for part in item.parts: for res in parse_item(part, **kwargs): res.filename = res.filename or item.filename res.parent = item yield res return code, line_map = item.get_code_and_line_map() try: filename = list(item.get_filenames())[0] except IndexError: if not item.lines: return filename = None result = parse_source_code( code, starting_rule=item.grammar_rule, line_map=line_map, fn=filename or "unknown", **kwargs, ) result.item = item yield result
[docs] def parse( path: AnyPath, input_format: Optional[str] = None, **kwargs, ) -> Generator[ParseResult, None, None]: """ Parse the given source code file (or all files from the given project). """ for item in load_file_by_name(path, input_format=input_format): yield from parse_item(item, **kwargs)
[docs] def build_arg_parser(argparser=None): if argparser is None: argparser = argparse.ArgumentParser() argparser.description = DESCRIPTION argparser.formatter_class = argparse.RawTextHelpFormatter argparser.add_argument( "filename", type=str, help=( "Path to project, solution, source code file (.tsproj, .sln, " ".TcPOU, .TcGVL)" ), ) # TODO: may eventually do file format checking argparser.add_argument( "-if", "--input-format", required=False, help=( "Load the provided files as the given type, overriding built-in " "filename extension mapping." ), ) argparser.add_argument( "--verbose", "-v", action="count", default=0, help="Increase verbosity, up to -vvv", ) argparser.add_argument( "--print-filename", action="store_true", help="Print filenames along with results", ) argparser.add_argument( "--print-source", action="store_true", help="Dump the source code", ) argparser.add_argument( "--print-tree", action="store_true", help="Dump the source code tree", ) argparser.add_argument( "--debug", action="store_true", help="On failure, still return the results tree", ) argparser.add_argument( "-i", "--interactive", action="store_true", help="Enter IPython (or Python) to explore source trees", ) argparser.add_argument( "-s", "--summary", dest="output_summary", action="store_true", help="Summarize code inputs and outputs", ) argparser.add_argument( "--json", dest="use_json", action="store_true", help="Output JSON representation only", ) argparser.add_argument( "--no-meta", action="store_false", dest="include_meta", help="Summarize code inputs and outputs", ) argparser.add_argument( "--filter", nargs="*", dest="filter_by_name", help="Filter items to parse by name", ) return argparser
[docs] def summarize( parsed: Union[ParseResult, list[ParseResult]], squash: bool = True, ) -> CodeSummary: """Get a code summary instance from one or more ParseResult instances.""" from .summary import CodeSummary return CodeSummary.from_parse_results(parsed, squash=squash)
T = TypeVar("T")
[docs] def dump_json( type_: Type[T], obj: T, include_meta: bool = True, indent: Optional[int] = 2, ) -> str: """ Dump object ``obj`` as type ``type_`` with apischema and serialize to a string. Parameters ---------- type_ : Type[T] The type of ``obj``. obj : T The object to serialize. include_meta : bool Include ``meta`` information in the dump. indent : int or None Make the JSON output prettier with indentation. Returns ------- str """ if apischema is None: raise RuntimeError( "Optional dependency apischema is required to output a JSON " "representation of source code." ) serialized = apischema.serialize( type_, obj, exclude_defaults=True, no_copy=True, ) if not include_meta: serialized = util.recursively_remove_keys(serialized, {"meta"}) return json.dumps(serialized, indent=indent)
[docs] def main( filename: Union[str, pathlib.Path], verbose: int = 0, debug: bool = False, interactive: bool = False, output_summary: bool = False, use_json: bool = False, print_filename: bool = False, print_source: bool = False, print_tree: bool = False, include_meta: bool = True, filter_by_name: Optional[list[str]] = None, input_format: Optional[str] = None, ) -> dict[str, list[ParseResult]]: """ Parse the given source code/project. """ results_by_filename = {} filter_by_name = filter_by_name or [] filename = pathlib.Path(filename) if use_json: print_filename = False if apischema is None: raise RuntimeError( "Optional dependency apischema is required to output a JSON " "representation of source code." ) print_filename = print_filename or verbose > 1 print_source = print_source or verbose > 1 print_tree = print_tree or verbose > 1 print_tracebacks = verbose > 1 def get_items(): for item in load_file_by_name(filename, input_format): if filter_by_name: if any(flt.lower() in str(filename) for flt in filter_by_name): logger.debug( "Included by filter (filename match): %s (%s)", item.identifier, type(item), ) elif not any(flt.lower() in item.identifier for flt in filter_by_name): logger.debug("Filtered out: %s (%s)", item.identifier, type(item)) continue else: logger.debug("Included by filter: %s", item.identifier) yield from parse_item(item) all_results = [] try: for index, res in enumerate(get_items(), start=1): all_results.append(res) results_by_filename.setdefault(str(filename), []).append(res) if print_filename: print(f"[{index}] Parsing {res.filename}: {res.identifier} ({res.item.type})") if print_source: res.dump_source() if print_tree and res.tree is not None: print(res.tree.pretty()) if res.exception is not None: tb = getattr(res.exception, "traceback", None) if print_tracebacks: print(tb) print( f"Failed to parse {res.filename} {res.identifier}: " f"Exception: {type(res.exception).__name__}: {res.exception}" ) if interactive: util.python_debug_session( namespace={"result": res}, message=( f"Failed to parse {res.filename} {res.identifier}.\n" f"Exception: {type(res.exception).__name__}: {res.exception}\n" f"{tb}" ), ) if output_summary: # Summary comes at the end continue if use_json: assert apischema is not None # We allow for StatementList to be parsed directly here, though # it's not acceptable as a top-level item in the grammar serialized = dump_json( tf.ExtendedSourceCode, res.transform(), include_meta=include_meta, ) print(serialized) except KeyboardInterrupt: print("\nCaught KeyboardInterrupt; stopping parsing.") if output_summary: summarized = summarize(all_results) if use_json: from .summary import CodeSummary print(dump_json(CodeSummary, summarized, include_meta=include_meta)) else: print(summarized) else: summarized = None if not results_by_filename: return {} results = [] for _, items in results_by_filename.items(): results.extend(items) failures = [item for item in results if item.exception is not None] if interactive: util.python_debug_session( namespace={ "results": results, "by_filename": results_by_filename, "failures": failures, "summary": summarized, }, message=( f"Saw {len(results_by_filename)} files with {len(results)} " f"total source code items.\n" f"There were {len(failures)} failures.\n" f"Results by filename are in ``by_filename``.\n" f"All results are also in a list ``results``.\n" f"Any failures are included in ``failures``.\n" ), ) if failures: print("Failed to parse some source code files:") for failure in failures: header = f"{failure.filename}: {failure.identifier}" print(header) print("-" * len(header)) print(f"({type(failure.exception).__name__}) {failure.exception}") print() # traceback.print_exc() if not debug: sys.exit(1) return results_by_filename