From 0ae649975e00360b76502a071a2052d9b79ed915 Mon Sep 17 00:00:00 2001 From: Ta Quang Trung Date: Mon, 6 Mar 2023 21:20:25 +0800 Subject: [PATCH] parse multiple pragma strings in the same source code --- examples/multiple_pragmas.sol | 26 +++++++++++ solc_detect/__init__.py | 6 +++ solc_detect/__main__.py | 4 +- solc_detect/lib.py | 9 ++-- solc_detect/pragma_grammar.lark | 14 +++--- solc_detect/pragma_parser.py | 78 +++++++++++++++++++-------------- 6 files changed, 93 insertions(+), 44 deletions(-) create mode 100644 examples/multiple_pragmas.sol diff --git a/examples/multiple_pragmas.sol b/examples/multiple_pragmas.sol new file mode 100644 index 0000000..4f46847 --- /dev/null +++ b/examples/multiple_pragmas.sol @@ -0,0 +1,26 @@ +/* + * @source: https://smartcontractsecurity.github.io/SWC-registry/docs/SWC-101#bectokensol + * @author: - + * @vulnerable_at_lines: 264 + */ + +pragma solidity >0.6.0 <0.8.0; + +contract Contract1 { + function add(uint256 a, uint256 b) internal constant returns (uint256) { + uint256 c = a + b; + require(c >= a); + return c; + } +} + +pragma solidity ^0.7.0; + +pragma solidity ^0.7.2; + +contract Contract2 { + function sub(uint256 a, uint256 b) internal constant returns (uint256) { + require(b <= a); + return a - b; + } +} diff --git a/solc_detect/__init__.py b/solc_detect/__init__.py index 156fea7..69e857d 100644 --- a/solc_detect/__init__.py +++ b/solc_detect/__init__.py @@ -1,7 +1,13 @@ #!/usr/bin/env python3 +# Third Party +import colored_traceback + from . import lib +# Enable coloring backtrace when printing to terminal +colored_traceback.add_hook() + def find_pragma_solc_version(input_file): """Find the Solidity version declared in pragma of a smart contract.""" diff --git a/solc_detect/__main__.py b/solc_detect/__main__.py index ecb38a1..3ced318 100644 --- a/solc_detect/__main__.py +++ b/solc_detect/__main__.py @@ -55,10 +55,10 @@ def main(): input_file = args.input_file pragma_version = lib.find_pragma_solc_version(input_file) - print("Input pragma version:", pragma_version) + print("Detected pragmas:", pragma_version) best_version = lib.find_best_solc_version_for_pragma(pragma_version) - print("Best version:", best_version) + print("Best version:", best_version) return best_version diff --git a/solc_detect/lib.py b/solc_detect/lib.py index d333210..194c940 100644 --- a/solc_detect/lib.py +++ b/solc_detect/lib.py @@ -31,13 +31,14 @@ def init_all_solidity_versions(): def find_pragma_solc_version(input_file): """Find the Solidity version declared in pragma of a smart contract.""" - pragma_version = pragma_parser.parse_solidity_version(input_file) - return pragma_version + pragma_versions = pragma_parser.parse_solidity_version(input_file) + return pragma_versions -def find_best_solc_version_for_pragma(pragma_version): +def find_best_solc_version_for_pragma(pragma_versions): """Find the best version of Solc compiler for a pragma version.""" - version_spec = NpmSpec(pragma_version) + combined_version = " ".join(pragma_versions) + version_spec = NpmSpec(combined_version) all_versions = init_all_solidity_versions() best_version = version_spec.select(all_versions) return best_version diff --git a/solc_detect/pragma_grammar.lark b/solc_detect/pragma_grammar.lark index 5efb397..981923e 100644 --- a/solc_detect/pragma_grammar.lark +++ b/solc_detect/pragma_grammar.lark @@ -1,13 +1,17 @@ // Grammar to parse the `pragma` string of Solidity smart contracts. // -// NOTE: use the prefix `_` to filter out rules/teminals from the parse tree. +// NOTE: +// - rule: `_rule` means inlining this rule into in their containing rule. +// - terminal: `_TERM` means filter out this terminal -source_unit: pragma_version other_source_unit_elements* +source_unit: (solidity_pragma non_solidity_pragma?)* -pragma_version: _PRAGMA _SOLIDITY semantic_version _SEMICOLON -semantic_version: /[^;]+/ +solidity_pragma: _PRAGMA _SOLIDITY pragma_version_info _SEMICOLON -other_source_unit_elements: /(.|\n|\r)+/s +pragma_version_info: /[^;]+/ + +// Rules and terminals cannot be used inside regular expression. +non_solidity_pragma: /(((?!pragma *solidity).)|\n|\r)+/s _PRAGMA: "pragma" _SOLIDITY: "solidity" diff --git a/solc_detect/pragma_parser.py b/solc_detect/pragma_parser.py index 4f2e10d..b8bee54 100644 --- a/solc_detect/pragma_parser.py +++ b/solc_detect/pragma_parser.py @@ -11,86 +11,98 @@ import os import sys from dataclasses import dataclass +from typing import List from lark import Lark, Transformer, ast_utils, v_args ######################################## -# Constants +# Some constants PARSER_DIR = os.path.dirname(__file__) THIS_MODULE = sys.modules[__name__] ######################################## -# AST capturing pragma version +# Source code and AST @dataclass -class _AST(ast_utils.Ast): - # Use `_` in class name to be skipped by create_transformer() - pass +class AST(ast_utils.Ast): + """Empty class represent an AST node""" @dataclass -class PragmaVersion(_AST): - """Class representing a pragma version""" +class SolidityPragma(AST): + """Class representing a `pragma` version. + + This class corresponds to the rule `solidity_grammar` when + `ast_utils.create_transformer(this_module, ...)` is called. + """ version: str @dataclass -class SourceUnit(_AST): - """Class representing a source unit""" +class SourceUnit(AST): + """Class representing a source unit. - pragma_version: PragmaVersion - other_source_unit_elements: str + This class corresponds to the rule `source_unit` when + `ast_utils.create_transformer(this_module, ...)`. + """ + solidity_pragmas: List[SolidityPragma] + + +######################################## +# Other transformer to generate AST -@dataclass -class _DOT(_AST): - pass +class ASTTransformer(Transformer): + """Class to define extra rules to transform the parsed tree to AST. -class ToAST(Transformer): - """Class to transform the parsed tree to AST tree""" + These rules are the rules that do not correspond to any dataclass defined + above. + """ @v_args(inline=True) - def source_unit(self, pragma, _other_source_unit_elements): + def source_unit(self, *elements): """Parse rule `source_unit`""" - return pragma + pragmas = [elem for elem in elements if elem is not None] + return SourceUnit(pragmas) @v_args(inline=True) - def pragma_version(self, semver): - """Parse rule `pragma_version`""" - return PragmaVersion(semver) + def solidity_pragma(self, version) -> SolidityPragma: + """Parse rule `solidity_pragma`""" + return SolidityPragma(version) @v_args(inline=True) - def semantic_version(self, semver): - """Parse rule `semantic_version`""" - version = str(semver) + def pragma_version_info(self, version) -> str: + """Parse rule `pragma_version_info`""" + version = str(version) return version.strip() - @v_args(inline=True) - def other_source_unit_elements(self, other): - """Parse rule `other_source_unit_elements`""" - return str(other) + # @v_args(inline=True) + def non_solidity_pragma(self, non_solidity_pragma): + """Parse rule `non_solidity_pragma`""" + # Do not capture `non-solidity-pragma` elements + return None ######################################## -# Grammar to capture pragma version +# Parse Solidity `pragma` def parse_solidity_version(input_file): - """Parse pragma string in a Solidity source code.""" + """Parse `pragma` string in a Solidity source code.""" # Read grammar file grammar_file = os.path.join(PARSER_DIR, "pragma_grammar.lark") with open(grammar_file, "r", encoding="utf-8") as file: grammar = file.read() parser = Lark(grammar, start="source_unit") - transformer = ast_utils.create_transformer(THIS_MODULE, ToAST()) + # transformer = ast_utils.create_transformer(THIS_MODULE, ToAST()) # Read file to string with open(input_file, "r", encoding="utf-8") as file: content = file.read() parsed_tree = parser.parse(content) - source_unit = transformer.transform(parsed_tree) - return source_unit.pragma_version.version + source_unit = ASTTransformer().transform(parsed_tree) + return [pragma.version for pragma in source_unit.solidity_pragmas]