Skip to content

Commit

Permalink
parse multiple pragma strings in the same source code
Browse files Browse the repository at this point in the history
  • Loading branch information
taquangtrung committed Mar 6, 2023
1 parent 577eade commit 0ae6499
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 44 deletions.
26 changes: 26 additions & 0 deletions examples/multiple_pragmas.sol
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* @source: https://smartcontractsecurity.github.io/SWC-registry/docs/SWC-101#bectokensol
* @author: -
* @vulnerable_at_lines: 264
*/

pragma solidity >0.6.0 <0.8.0;

contract Contract1 {
function add(uint256 a, uint256 b) internal constant returns (uint256) {
uint256 c = a + b;
require(c >= a);
return c;
}
}

pragma solidity ^0.7.0;

pragma solidity ^0.7.2;

contract Contract2 {
function sub(uint256 a, uint256 b) internal constant returns (uint256) {
require(b <= a);
return a - b;
}
}
6 changes: 6 additions & 0 deletions solc_detect/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
#!/usr/bin/env python3

# Third Party
import colored_traceback

from . import lib

# Enable coloring backtrace when printing to terminal
colored_traceback.add_hook()


def find_pragma_solc_version(input_file):
"""Find the Solidity version declared in pragma of a smart contract."""
Expand Down
4 changes: 2 additions & 2 deletions solc_detect/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,10 @@ def main():
input_file = args.input_file

pragma_version = lib.find_pragma_solc_version(input_file)
print("Input pragma version:", pragma_version)
print("Detected pragmas:", pragma_version)

best_version = lib.find_best_solc_version_for_pragma(pragma_version)
print("Best version:", best_version)
print("Best version:", best_version)

return best_version

Expand Down
9 changes: 5 additions & 4 deletions solc_detect/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,14 @@ def init_all_solidity_versions():

def find_pragma_solc_version(input_file):
"""Find the Solidity version declared in pragma of a smart contract."""
pragma_version = pragma_parser.parse_solidity_version(input_file)
return pragma_version
pragma_versions = pragma_parser.parse_solidity_version(input_file)
return pragma_versions


def find_best_solc_version_for_pragma(pragma_version):
def find_best_solc_version_for_pragma(pragma_versions):
"""Find the best version of Solc compiler for a pragma version."""
version_spec = NpmSpec(pragma_version)
combined_version = " ".join(pragma_versions)
version_spec = NpmSpec(combined_version)
all_versions = init_all_solidity_versions()
best_version = version_spec.select(all_versions)
return best_version
Expand Down
14 changes: 9 additions & 5 deletions solc_detect/pragma_grammar.lark
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
// Grammar to parse the `pragma` string of Solidity smart contracts.
//
// NOTE: use the prefix `_` to filter out rules/teminals from the parse tree.
// NOTE:
// - rule: `_rule` means inlining this rule into in their containing rule.
// - terminal: `_TERM` means filter out this terminal

source_unit: pragma_version other_source_unit_elements*
source_unit: (solidity_pragma non_solidity_pragma?)*

pragma_version: _PRAGMA _SOLIDITY semantic_version _SEMICOLON
semantic_version: /[^;]+/
solidity_pragma: _PRAGMA _SOLIDITY pragma_version_info _SEMICOLON

other_source_unit_elements: /(.|\n|\r)+/s
pragma_version_info: /[^;]+/

// Rules and terminals cannot be used inside regular expression.
non_solidity_pragma: /(((?!pragma *solidity).)|\n|\r)+/s

_PRAGMA: "pragma"
_SOLIDITY: "solidity"
Expand Down
78 changes: 45 additions & 33 deletions solc_detect/pragma_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,86 +11,98 @@
import os
import sys
from dataclasses import dataclass
from typing import List

from lark import Lark, Transformer, ast_utils, v_args

########################################
# Constants
# Some constants

PARSER_DIR = os.path.dirname(__file__)
THIS_MODULE = sys.modules[__name__]

########################################
# AST capturing pragma version
# Source code and AST


@dataclass
class _AST(ast_utils.Ast):
# Use `_` in class name to be skipped by create_transformer()
pass
class AST(ast_utils.Ast):
"""Empty class represent an AST node"""


@dataclass
class PragmaVersion(_AST):
"""Class representing a pragma version"""
class SolidityPragma(AST):
"""Class representing a `pragma` version.
This class corresponds to the rule `solidity_grammar` when
`ast_utils.create_transformer(this_module, ...)` is called.
"""

version: str


@dataclass
class SourceUnit(_AST):
"""Class representing a source unit"""
class SourceUnit(AST):
"""Class representing a source unit.
pragma_version: PragmaVersion
other_source_unit_elements: str
This class corresponds to the rule `source_unit` when
`ast_utils.create_transformer(this_module, ...)`.
"""

solidity_pragmas: List[SolidityPragma]


########################################
# Other transformer to generate AST

@dataclass
class _DOT(_AST):
pass

class ASTTransformer(Transformer):
"""Class to define extra rules to transform the parsed tree to AST.
class ToAST(Transformer):
"""Class to transform the parsed tree to AST tree"""
These rules are the rules that do not correspond to any dataclass defined
above.
"""

@v_args(inline=True)
def source_unit(self, pragma, _other_source_unit_elements):
def source_unit(self, *elements):
"""Parse rule `source_unit`"""
return pragma
pragmas = [elem for elem in elements if elem is not None]
return SourceUnit(pragmas)

@v_args(inline=True)
def pragma_version(self, semver):
"""Parse rule `pragma_version`"""
return PragmaVersion(semver)
def solidity_pragma(self, version) -> SolidityPragma:
"""Parse rule `solidity_pragma`"""
return SolidityPragma(version)

@v_args(inline=True)
def semantic_version(self, semver):
"""Parse rule `semantic_version`"""
version = str(semver)
def pragma_version_info(self, version) -> str:
"""Parse rule `pragma_version_info`"""
version = str(version)
return version.strip()

@v_args(inline=True)
def other_source_unit_elements(self, other):
"""Parse rule `other_source_unit_elements`"""
return str(other)
# @v_args(inline=True)
def non_solidity_pragma(self, non_solidity_pragma):
"""Parse rule `non_solidity_pragma`"""
# Do not capture `non-solidity-pragma` elements
return None


########################################
# Grammar to capture pragma version
# Parse Solidity `pragma`


def parse_solidity_version(input_file):
"""Parse pragma string in a Solidity source code."""
"""Parse `pragma` string in a Solidity source code."""
# Read grammar file
grammar_file = os.path.join(PARSER_DIR, "pragma_grammar.lark")
with open(grammar_file, "r", encoding="utf-8") as file:
grammar = file.read()
parser = Lark(grammar, start="source_unit")
transformer = ast_utils.create_transformer(THIS_MODULE, ToAST())
# transformer = ast_utils.create_transformer(THIS_MODULE, ToAST())

# Read file to string
with open(input_file, "r", encoding="utf-8") as file:
content = file.read()
parsed_tree = parser.parse(content)
source_unit = transformer.transform(parsed_tree)
return source_unit.pragma_version.version
source_unit = ASTTransformer().transform(parsed_tree)
return [pragma.version for pragma in source_unit.solidity_pragmas]

0 comments on commit 0ae6499

Please sign in to comment.