From ae1ae1a1e9fedbad841d6128dac9e03406b54e78 Mon Sep 17 00:00:00 2001 From: Elliot Chance Date: Fri, 24 Mar 2017 08:19:47 +1100 Subject: [PATCH 1/5] Added a new script for converting clang output AST to JSON --- .gitignore | 1 + ast2json.py | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 ast2json.py diff --git a/.gitignore b/.gitignore index 99284e498..835db24c1 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ /pp.c /out.go /a.out +/pp.json diff --git a/ast2json.py b/ast2json.py new file mode 100644 index 000000000..5aa3bd625 --- /dev/null +++ b/ast2json.py @@ -0,0 +1,142 @@ +import sys +import re +import json + +# This script converts the output of clang AST into a JSON file. +# +# Usage: +# clang -Xclang -ast-dump -fsyntax-only myfile.c | python ast2json.py +# +# Yes, there are many better ways to do this. However I chose this method +# because: +# +# 1. I need to separate the clang AST from the c2go conversion process so that +# the c2go program can ingest a reliable JSON file and not depend on clang or +# its different versions at all. +# 2. The clang API is not stable and trying to match up binaries with different +# versions and operating systems can be tricky and brittle. +# 3. This tool, in time, will become a better binary of some kind that produces +# much the same JSON output (so minimal changes to c2go.py). +# 4. I needed something quick and dirty to proof the complete toolchain and get +# it working on different versions of clang and different operating systems +# before we enough information to really standardise the process. + +regex = { + 'AlwaysInlineAttr': r"^ (?P
[0-9a-fx]+) <(?P.*)> always_inline", + 'AsmLabelAttr': r"^ (?P
[0-9a-fx]+) <(?P.*)> \"(?P.+)\"", + 'AvailabilityAttr': r"^ (?P
[0-9a-fx]+) <(?P.*)> (?P\w+) (?P[\d.]+) (?P[\d.]+) (?P[\d.]+) (?P\".*?\"|\w+) (?P\".*?\"|\w+)", + 'TranslationUnitDecl': r'^ (?P
[0-9a-fx]+)', + 'IntegerLiteral': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*)\' (?P\d+)', + 'TypedefDecl': r'^ (?P
[0-9a-fx]+) <(?P.+?)> (?P|[^ ]+)(?P.*?) (?P\w+) \'(?P.*?)\'(?P:\'.*?\')?', + 'BuiltinType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', + 'ReturnStmt': r'^ (?P
[0-9a-fx]+) <(?P.*)>', + 'StringLiteral': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*)\' (?P.*)', + 'ImplicitCastExpr': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*)\' <(?P.*)>', + 'DeclRefExpr': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*?)\' (?P.*)', + 'CallExpr': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*?)\'', + 'ParenExpr': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*?)\'', + 'CompoundStmt': r'^ (?P
[0-9a-fx]+) <(?P.*)>', + 'IfStmt': r'^ (?P
[0-9a-fx]+) <(?P.*)>', + 'FunctionDecl': r'^ (?P
[0-9a-fx]+) (?Pprev [0-9a-fx]+)? ?<(?P.*)> (?P[^ ]+)(?P implicit)?(?P used)? (?P\w+) \'(?P.*)\'(?P extern)?', + 'ParmVarDecl': r'^ (?P
[0-9a-fx]+) <(?P.*)> (?P.+) \'(?P.*?)\'(?P:\'.*?\')?', + 'FormatAttr': r'^ (?P
[0-9a-fx]+) <(?P.*)>(?P Implicit)? (?P\w+) (?P\d+) (?P\d+)', + 'RecordType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', + 'Record': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', + 'PointerType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', + 'Typedef': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', + 'ConstantArrayType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\' (?P\d+)', + 'RecordDecl': r'^ (?P
[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?P.+)', + 'FieldDecl': r'^ (?P
[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?P.+)', + 'ElaboratedType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\' (?P.+)', + 'TypedefType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\' (?P.+)', + 'VarDecl': r"^ (?P
[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?P.+) '(?P.+)'(?P.*)", + 'DeprecatedAttr': r"^ (?P
[0-9a-fx]+) <(?P.*)> \"(?P.*?)\" \"(?P.*?)\"", + 'BinaryOperator': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' '(?P.*?)'", + 'MemberExpr': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' lvalue ->(?P\w+) (?P[0-9a-fx]+)", + 'CStyleCastExpr': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' <(?P.*)>", + 'CharacterLiteral': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' (?P\d+)", + 'UnaryOperator': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)'(?P lvalue)?(?P prefix)?(?P postfix)? '(?P.*?)'", + 'DeclStmt': r"^ (?P
[0-9a-fx]+) <(?P.*)>", + 'ForStmt': r"^ (?P
[0-9a-fx]+) <(?P.*)>", + 'BreakStmt': r"^ (?P
[0-9a-fx]+) <(?P.*)>", +} + +def build_tree(nodes, depth): + """Convert an array of nodes, each prefixed with a depth into a tree.""" + if len(nodes) == 0: + return [] + + # Split the list into sections, treat each section as a a tree with its own + # root. + sections = [] + for node in nodes: + if node[0] == depth: + sections.append([node]) + else: + sections[-1].append(node) + + results = [] + for section in sections: + children = build_tree([n for n in section if n[0] > depth], depth + 1) + result = section[0][1] + + if len(children) > 0: + result['children'] = children + + results.append(result) + + return results + +def read_ast(): + stdin = sys.stdin.read() + uncolored = re.sub(r'\x1b\[[\d;]+m', '', stdin) + return uncolored.split("\n") + +def convert_lines_to_nodes(lines): + nodes = [] + for line in lines: + if line.strip() == '': + continue + + # This will need to be handled more gracefully... I'm not even sure + # what this means? + if '<<>>' in line: + continue + + indent_and_type = re.search(r'^([|\- `]*)(\w+)', line) + if indent_and_type is None: + print("Can not understand line '%s'" % line) + sys.exit(1) + + offset = len(indent_and_type.group(0)) + try: + result = re.search(regex[indent_and_type.group(2)], line[offset:]) + except KeyError: + print("There is no regex for '%s'." % indent_and_type.group(2)) + print("I will print out all the lines so a regex can be created:\n") + + for line in lines: + s = re.search(r'^([|\- `]*)(\w+)', line) + if s is not None and indent_and_type.group(2) == s.group(2): + print(line[offset:]) + + sys.exit(1) + + if result is None: + print("Can not understand line '%s'" % line) + sys.exit(1) + + node = result.groupdict() + + node['node'] = indent_and_type.group(2) + + indent_level = len(indent_and_type.group(1)) / 2 + nodes.append([indent_level, node]) + + return nodes + +lines = read_ast() +nodes = convert_lines_to_nodes(lines) +tree = build_tree(nodes, 0) + +print(json.dumps(tree, sort_keys=True, indent=2, separators=(',', ': '))) From ddcca04fd7b8b26e7497636978e0edd2ff5f379d Mon Sep 17 00:00:00 2001 From: Elliot Chance Date: Fri, 24 Mar 2017 08:21:20 +1100 Subject: [PATCH 2/5] Some refactoring of c2go to take in the new JSON --- c2go.py | 334 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 181 insertions(+), 153 deletions(-) diff --git a/c2go.py b/c2go.py index 47ed3c595..a2180c452 100644 --- a/c2go.py +++ b/c2go.py @@ -1,9 +1,9 @@ import sys -import clang.cindex import pprint import re import subprocess import StringIO +import json function_defs = { '__istype': ('uint32', ('__darwin_ct_rune_t', 'uint32')), @@ -127,7 +127,7 @@ def print_line(out, line, indent): out.write('%s%s\n' % ('\t' * indent, line)) def render_expression(node): - if node.kind.name == 'BINARY_OPERATOR': + if node['node'] == 'BINARY_OPERATOR': end_of_left = list(node.get_children())[0].extent.end.column operator = None for t in node.get_tokens(): @@ -143,14 +143,14 @@ def render_expression(node): return '%s %s %s' % (left, operator, right), return_type - if node.kind.name == 'CONDITIONAL_OPERATOR': + if node['node'] == 'CONDITIONAL_OPERATOR': a, b, c = [render_expression(t) for t in list(node.get_children())] try: return '__ternary(%s, %s, %s)' % (cast(a[0], 'bool'), b[0], c[0]), b[1] except TypeError: return '// CONDITIONAL_OPERATOR: %s' % ''.join([t.spelling for t in node.get_tokens()]), 'unknown' - if node.kind.name == 'UNARY_OPERATOR': + if node['node'] == 'UNARY_OPERATOR': # print(children[2].kind.name) expr_start = list(node.get_children())[0].extent.start.column @@ -183,7 +183,7 @@ def render_expression(node): return '%s%s' % (operator, expr[0]), expr[1] - if node.kind.name == 'UNEXPOSED_EXPR': + if node['node'] == 'UNEXPOSED_EXPR': children = list(node.get_children()) if len(children) < 1: return '// UNEXPOSED_EXPR: %s' % ''.join([t.spelling for t in node.get_tokens()]), 'unknown' @@ -203,25 +203,28 @@ def render_expression(node): return name, e[1] - if node.kind.name in ('CHARACTER_LITERAL', 'STRING_LITERAL', 'FLOATING_LITERAL'): + if node['node'] in ('CHARACTER_LITERAL', 'STRING_LITERAL', 'FLOATING_LITERAL'): return list(node.get_tokens())[0].spelling, 'const char*' - if node.kind.name == 'INTEGER_LITERAL': + if node['node'] == 'INTEGER_LITERAL': literal = list(node.get_tokens())[0].spelling if literal[-1] == 'L': literal = '%s(%s)' % (resolve_type('long'), literal[:-1]) return literal, 'int' - if node.kind.name == 'PAREN_EXPR': + if node['node'] == 'PAREN_EXPR': e = render_expression(list(node.get_children())[0]) return '(%s)' % e[0], e[1] - if node.kind.name == 'DECL_REF_EXPR': - return node.spelling, node.type.spelling + if node['node'] == 'DeclRefExpr': + return node['unknown'], node['type'] - if node.kind.name == 'CALL_EXPR': - children = list(node.get_children()) + if node['node'] == 'ImplicitCastExpr': + return render_expression(node['children'][0]) + + if node['node'] == 'CallExpr': + children = node['children'] func_name = render_expression(children[0])[0] func_def = function_defs[func_name] @@ -245,25 +248,25 @@ def render_expression(node): return '%s(%s)' % (func_name, ', '.join(args)), func_def[0] - if node.kind.name == 'ARRAY_SUBSCRIPT_EXPR': + if node['node'] == 'ARRAY_SUBSCRIPT_EXPR': children = list(node.get_children()) return '%s[%s]' % (render_expression(children[0])[0], render_expression(children[1])[0]), 'unknown' - if node.kind.name == 'MEMBER_REF_EXPR': + if node['node'] == 'MEMBER_REF_EXPR': children = list(node.get_children()) return '%s.%s' % (render_expression(children[0])[0], list(node.get_tokens())[-2].spelling), 'unknown' - if node.kind.name == 'CSTYLE_CAST_EXPR': + if node['node'] == 'CSTYLE_CAST_EXPR': children = list(node.get_children()) return render_expression(children[0]), 'unknown' - if node.kind.name == 'FIELD_DECL' or node.kind.name == 'VAR_DECL': + if node['node'] == 'FIELD_DECL' or node['node'] == 'VAR_DECL': type = resolve_type(node.type.spelling) name = node.spelling prefix = '' - if node.kind.name == 'VAR_DECL': + if node['node'] == 'VAR_DECL': prefix = 'var ' suffix = '' @@ -277,10 +280,10 @@ def render_expression(node): return '%s%s %s%s' % (prefix, name, type, suffix), 'unknown' - if node.kind.name == 'PARM_DECL': + if node['node'] == 'PARM_DECL': return resolve_type(node.type.spelling), 'unknown' - return node.kind.name, 'unknown' + return node['node'], 'unknown' #raise Exception('render_expression: %s' % node.kind) @@ -289,29 +292,38 @@ def print_children(node): for child in node.get_children(): print(child.kind.name, render_expression(child), [t.spelling for t in child.get_tokens()]) +def get_function_params(nodes): + if 'children' not in nodes: + return [] + + return [n for n in nodes['children'] if n['node'] == 'ParmVarDecl'] + def render(out, node, indent=0, return_type=None): - if node.kind.name == 'TRANSLATION_UNIT': - for c in node.get_children(): + if node['node'] == 'TranslationUnitDecl': + for c in node['children']: render(out, c, indent, return_type) return - if node.kind.name == 'FUNCTION_DECL': - function_name = node.spelling + if node['node'] == 'FunctionDecl': + function_name = node['name'] if function_name in ('__istype', '__isctype', '__wcwidth', '__sputc'): return has_body = False - for c in node.get_children(): - if c.kind.name == 'COMPOUND_STMT': - has_body = True + if 'children' in node: + for c in node['children']: + if c['node'] == 'CompoundStmt': + has_body = True + # print(function_name) + # print(json.dumps(node['children'])) args = [] - for a in node.get_arguments(): - args.append('%s %s' % (a.spelling, resolve_type(a.type.spelling))) + # for a in get_function_params(node): + # args.append('%s %s' % (a['name'], resolve_type(a['type']))) if has_body: - return_type = ' ' + node.result_type.spelling + return_type = ' ' + node['type'] if return_type == ' void': return_type = '' @@ -321,82 +333,82 @@ def render(out, node, indent=0, return_type=None): print_line(out, 'func %s(%s)%s {' % (function_name, ', '.join(args), return_type), indent) - for c in node.get_children(): - if c.kind.name == 'COMPOUND_STMT': - render(out, c, indent + 1, node.result_type.spelling) + for c in node['children']: + if c['node'] == 'CompoundStmt': + render(out, c, indent + 1, node['type']) print_line(out, '}\n', indent) - function_defs[node.spelling] = (node.result_type.spelling, [a.type.spelling for a in node.get_arguments()]) + # function_defs[node.spelling] = (node.result_type.spelling, [a.type.spelling for a in node.get_arguments()]) return - if node.kind.name == 'PARM_DECL': - print_line(out, node.spelling, indent) - return + # if node['node'] == 'PARM_DECL': + # print_line(out, node.spelling, indent) + # return - if node.kind.name == 'COMPOUND_STMT': - for c in node.get_children(): + if node['node'] == 'CompoundStmt': + for c in node['children']: render(out, c, indent, return_type) return - if node.kind.name == 'IF_STMT': - children = list(node.get_children()) + # if node['node'] == 'IF_STMT': + # children = list(node.get_children()) - e = render_expression(children[0]) - print_line(out, 'if %s {' % cast(e[0], e[1], 'bool'), indent) + # e = render_expression(children[0]) + # print_line(out, 'if %s {' % cast(e[0], e[1], 'bool'), indent) - render(out, children[1], indent + 1, return_type) + # render(out, children[1], indent + 1, return_type) - if len(children) > 2: - print_line(out, '} else {', indent) - render(out, children[2], indent + 1, return_type) + # if len(children) > 2: + # print_line(out, '} else {', indent) + # render(out, children[2], indent + 1, return_type) - print_line(out, '}', indent) + # print_line(out, '}', indent) - return + # return - if node.kind.name == 'WHILE_STMT': - children = list(node.get_children()) + # if node['node'] == 'WHILE_STMT': + # children = list(node.get_children()) - e = render_expression(children[0]) - print_line(out, 'for %s {' % cast(e[0], e[1], 'bool'), indent) + # e = render_expression(children[0]) + # print_line(out, 'for %s {' % cast(e[0], e[1], 'bool'), indent) - render(out, children[1], indent + 1, return_type) + # render(out, children[1], indent + 1, return_type) - print_line(out, '}', indent) + # print_line(out, '}', indent) - return + # return - if node.kind.name == 'FOR_STMT': - children = list(node.get_children()) + # if node['node'] == 'FOR_STMT': + # children = list(node.get_children()) - a, b, c = [render_expression(e)[0] for e in children[:3]] - print_line(out, 'for %s; %s; %s {' % (a, b, c), indent) + # a, b, c = [render_expression(e)[0] for e in children[:3]] + # print_line(out, 'for %s; %s; %s {' % (a, b, c), indent) - render(out, children[3], indent + 1, return_type) + # render(out, children[3], indent + 1, return_type) - print_line(out, '}', indent) + # print_line(out, '}', indent) - return + # return - if node.kind.name == 'BREAK_STMT': - print_line(out, 'break', indent) - return + # if node['node'] == 'BREAK_STMT': + # print_line(out, 'break', indent) + # return - if node.kind.name == 'UNARY_OPERATOR': - variable, operator = [t.spelling for t in list(node.get_tokens())[0:2]] - if operator == '++': - print_line(out, '%s += 1' % variable, indent) - #print_line(out, '%s = string(%s[1:])' % (variable, variable), indent) - return + # if node['node'] == 'UNARY_OPERATOR': + # variable, operator = [t.spelling for t in list(node.get_tokens())[0:2]] + # if operator == '++': + # print_line(out, '%s += 1' % variable, indent) + # #print_line(out, '%s = string(%s[1:])' % (variable, variable), indent) + # return - print_line(out, '%s%s' % (operator, variable), indent) - return + # print_line(out, '%s%s' % (operator, variable), indent) + # return - #raise Exception('UNARY_OPERATOR: %s' % operator) + # #raise Exception('UNARY_OPERATOR: %s' % operator) - if node.kind.name == 'RETURN_STMT': + if node['node'] == 'ReturnStmt': # try: # e = render_expression(list(node.get_children())[0]) # print_line(out, 'return %s' % cast(e[0], e[1], return_type), indent) @@ -405,11 +417,15 @@ def render(out, node, indent=0, return_type=None): return - if node.kind.name in ('BINARY_OPERATOR', 'INTEGER_LITERAL', 'CALL_EXPR'): + if node['node'] in ('BINARY_OPERATOR', 'INTEGER_LITERAL', 'CallExpr'): print_line(out, render_expression(node)[0], indent) return - if node.kind.name == 'TYPEDEF_DECL': + if node['node'] == 'TypedefDecl': + print_line(out, "type %s %s\n" % (node['type'], node['name']), indent) + # print(node) + return + tokens = [t.spelling for t in node.get_tokens()] if len(list(node.get_children())) == 0: print_line(out, "type %s %s\n" % (tokens[-2], resolve_type(' '.join(tokens[1:-2]))), indent) @@ -418,67 +434,70 @@ def render(out, node, indent=0, return_type=None): return - if node.kind.name == 'UNION_DECL' or node.kind.name == 'STRUCT_DECL': - tokens = [t.spelling for t in node.get_tokens()] - - struct_name = tokens[-1] - start_at = 2 - if struct_name == ';': - struct_name = tokens[1] - start_at = 3 - - if struct_name in ('__darwin_pthread_handler_rec', '_opaque_pthread_t', - '_RuneEntry', '_RuneRange', '_RuneCharClass', '_RuneLocale'): - return - - print_line(out, "type %s struct {" % struct_name, indent) - - for attribute in node.get_children(): - print_line(out, render_expression(attribute)[0], indent + 1) - # print(struct_name, render_expression(attribute)) - - # name = '' - # type = '' - # for token in tokens[start_at:-2]: - # if token == ';': - # print_line(out, '%s %s' % (name, resolve_type(type)), indent + 1) - # type = '' - # elif is_identifier(token): - # name = token - # else: - # type += ' ' + token - - print_line(out, "}\n", indent) - return - - if node.kind.name == 'UNEXPOSED_DECL': - tokens = [t.spelling for t in node.get_tokens()] - print_line(out, '// ' + ' '.join(tokens[1:-2]), indent) + if node['node'] == 'RecordDecl': return - if node.kind.name == 'DECL_STMT': - for child in node.get_children(): - print_line(out, render_expression(child)[0], indent) - return - - if node.kind.name == 'VAR_DECL': - tokens = [t.spelling for t in node.get_tokens()] - if tokens[0] == 'extern': - return - - children = list(node.get_children()) - if len(children) > 0: - print_line(out, 'var %s %s = %s\n' % (tokens[2], tokens[1], render_expression(children[0])[0]), indent) - else: - print_line(out, 'var %s %s\n' % (tokens[2], tokens[1]), indent) + #if node['node'] == 'UNION_DECL' or node['node'] == 'STRUCT_DECL': + # tokens = [t.spelling for t in node.get_tokens()] + + # struct_name = tokens[-1] + # start_at = 2 + # if struct_name == ';': + # struct_name = tokens[1] + # start_at = 3 + + # if struct_name in ('__darwin_pthread_handler_rec', '_opaque_pthread_t', + # '_RuneEntry', '_RuneRange', '_RuneCharClass', '_RuneLocale'): + # return + + # print_line(out, "type %s struct {" % struct_name, indent) + + # for attribute in node.get_children(): + # print_line(out, render_expression(attribute)[0], indent + 1) + # # print(struct_name, render_expression(attribute)) + + # # name = '' + # # type = '' + # # for token in tokens[start_at:-2]: + # # if token == ';': + # # print_line(out, '%s %s' % (name, resolve_type(type)), indent + 1) + # # type = '' + # # elif is_identifier(token): + # # name = token + # # else: + # # type += ' ' + token + + # print_line(out, "}\n", indent) + # return + + # if node['node'] == 'UNEXPOSED_DECL': + # tokens = [t.spelling for t in node.get_tokens()] + # print_line(out, '// ' + ' '.join(tokens[1:-2]), indent) + # return + + # if node['node'] == 'DECL_STMT': + # for child in node.get_children(): + # print_line(out, render_expression(child)[0], indent) + # return + + if node['node'] == 'VarDecl': + # tokens = [t.spelling for t in node.get_tokens()] + # if tokens[0] == 'extern': + # return + + # children = list(node.get_children()) + # if len(children) > 0: + # print_line(out, 'var %s %s = %s\n' % (tokens[2], tokens[1], render_expression(children[0])[0]), indent) + # else: + # print_line(out, 'var %s %s\n' % (tokens[2], tokens[1]), indent) return - if node.kind.name == 'ENUM_DECL': - print_line(out, '// enum', indent) - return + # if node['node'] == 'ENUM_DECL': + # print_line(out, '// enum', indent) + # return - raise Exception(node.kind) + raise Exception(node['node']) # 1. Compile it first (checking for errors) c_file_path = sys.argv[1] @@ -491,24 +510,33 @@ def render(out, node, indent=0, return_type=None): with open(pp_file_path, 'w') as pp_out: pp_out.write(pp) -# 3. Parse C and output Go -index = clang.cindex.Index.create() -tu = index.parse(pp_file_path) - -go_file_path = '%s.go' % c_file_path.split('/')[-1][:-2] -# go_out = sys.stdout -go_out = StringIO.StringIO() -#with open(go_file_path, 'w') as go_out: -# print_line(go_out, "package main\n", 0) -#print_line(go_out, 'import ("fmt"; "os")\n', 0) -render(go_out, tu.cursor) - -print("package main\n") -print("import (") -for import_name in sorted(imports): - print('\t"%s"' % import_name) -print(")\n") -print(go_out.getvalue()) - -# 4. Compile the Go -#subprocess.call(["go", "run", "functions.go", go_file_path]) +# 3. Generate JSON from AST +ast_pp = subprocess.Popen(["clang", "-Xclang", "-ast-dump", "-fsyntax-only", pp_file_path], stdout=subprocess.PIPE) +pp = subprocess.Popen(["python", "ast2json.py"], stdin=ast_pp.stdout, stdout=subprocess.PIPE).communicate()[0] + +json_file_path = 'pp.json' +with open(json_file_path, 'w') as json_out: + json_out.write(pp) + +with open(json_file_path, 'r') as json_in: + # 3. Parse C and output Go + # index = clang.cindex.Index.create() + # tu = index.parse(pp_file_path) + + go_file_path = '%s.go' % c_file_path.split('/')[-1][:-2] + # go_out = sys.stdout + go_out = StringIO.StringIO() + #with open(go_file_path, 'w') as go_out: + # print_line(go_out, "package main\n", 0) + #print_line(go_out, 'import ("fmt"; "os")\n', 0) + render(go_out, json.loads(json_in.read())[0]) + + print("package main\n") + print("import (") + for import_name in sorted(imports): + print('\t"%s"' % import_name) + print(")\n") + print(go_out.getvalue()) + + # 4. Compile the Go + #subprocess.call(["go", "run", "functions.go", go_file_path]) From ca3c8568fe89703ecc5de9a3df957a32177d8704 Mon Sep 17 00:00:00 2001 From: Elliot Chance Date: Fri, 24 Mar 2017 10:32:48 +1100 Subject: [PATCH 3/5] hello-world.c is now working with the new JSON --- ast2json.py | 62 ++++++++++++++++++++++++++++------------------------- c2go.py | 21 ++++++++---------- 2 files changed, 42 insertions(+), 41 deletions(-) diff --git a/ast2json.py b/ast2json.py index 5aa3bd625..bebaf8add 100644 --- a/ast2json.py +++ b/ast2json.py @@ -25,40 +25,40 @@ 'AlwaysInlineAttr': r"^ (?P
[0-9a-fx]+) <(?P.*)> always_inline", 'AsmLabelAttr': r"^ (?P
[0-9a-fx]+) <(?P.*)> \"(?P.+)\"", 'AvailabilityAttr': r"^ (?P
[0-9a-fx]+) <(?P.*)> (?P\w+) (?P[\d.]+) (?P[\d.]+) (?P[\d.]+) (?P\".*?\"|\w+) (?P\".*?\"|\w+)", - 'TranslationUnitDecl': r'^ (?P
[0-9a-fx]+)', - 'IntegerLiteral': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*)\' (?P\d+)', - 'TypedefDecl': r'^ (?P
[0-9a-fx]+) <(?P.+?)> (?P|[^ ]+)(?P.*?) (?P\w+) \'(?P.*?)\'(?P:\'.*?\')?', + 'BinaryOperator': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' '(?P.*?)'", + 'BreakStmt': r"^ (?P
[0-9a-fx]+) <(?P.*)>", 'BuiltinType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', - 'ReturnStmt': r'^ (?P
[0-9a-fx]+) <(?P.*)>', - 'StringLiteral': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*)\' (?P.*)', - 'ImplicitCastExpr': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*)\' <(?P.*)>', - 'DeclRefExpr': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*?)\' (?P.*)', 'CallExpr': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*?)\'', - 'ParenExpr': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*?)\'', + 'CharacterLiteral': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' (?P\d+)", 'CompoundStmt': r'^ (?P
[0-9a-fx]+) <(?P.*)>', + 'ConstantArrayType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\' (?P\d+)', + 'CStyleCastExpr': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' <(?P.*)>", + 'DeclRefExpr': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' (lvalue (?P\w+)|Function) (?P[0-9a-fx]+) '(?P.*?)' '(?P.*?)'", + 'DeclStmt': r"^ (?P
[0-9a-fx]+) <(?P.*)>", + 'DeprecatedAttr': r"^ (?P
[0-9a-fx]+) <(?P.*)> \"(?P.*?)\" \"(?P.*?)\"", + 'ElaboratedType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\' (?P.+)', + 'FieldDecl': r'^ (?P
[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?P.+)', + 'FormatAttr': r'^ (?P
[0-9a-fx]+) <(?P.*)>(?P Implicit)? (?P\w+) (?P\d+) (?P\d+)', + 'ForStmt': r"^ (?P
[0-9a-fx]+) <(?P.*)>", + 'FunctionDecl': r"^ (?P
[0-9a-fx]+) (?Pprev [0-9a-fx]+)? ?<(?P.*)> (?P[^ ]+)(?P implicit)?(?P used)? (?P\w+) '(?P.*)'(?P extern)?", 'IfStmt': r'^ (?P
[0-9a-fx]+) <(?P.*)>', - 'FunctionDecl': r'^ (?P
[0-9a-fx]+) (?Pprev [0-9a-fx]+)? ?<(?P.*)> (?P[^ ]+)(?P implicit)?(?P used)? (?P\w+) \'(?P.*)\'(?P extern)?', + 'ImplicitCastExpr': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*)\' <(?P.*)>', + 'IntegerLiteral': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*)\' (?P\d+)', + 'MemberExpr': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' lvalue ->(?P\w+) (?P[0-9a-fx]+)", + 'ParenExpr': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*?)\'', 'ParmVarDecl': r'^ (?P
[0-9a-fx]+) <(?P.*)> (?P.+) \'(?P.*?)\'(?P:\'.*?\')?', - 'FormatAttr': r'^ (?P
[0-9a-fx]+) <(?P.*)>(?P Implicit)? (?P\w+) (?P\d+) (?P\d+)', - 'RecordType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', - 'Record': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', 'PointerType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', - 'Typedef': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', - 'ConstantArrayType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\' (?P\d+)', + 'Record': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', 'RecordDecl': r'^ (?P
[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?P.+)', - 'FieldDecl': r'^ (?P
[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?P.+)', - 'ElaboratedType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\' (?P.+)', + 'RecordType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', + 'ReturnStmt': r'^ (?P
[0-9a-fx]+) <(?P.*)>', + 'StringLiteral': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*)\'(?P lvalue)? (?P.*)', + 'TranslationUnitDecl': r'^ (?P
[0-9a-fx]+)', + 'Typedef': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', + 'TypedefDecl': r'^ (?P
[0-9a-fx]+) <(?P.+?)> (?P|[^ ]+)(?P.*?) (?P\w+) \'(?P.*?)\'(?P:\'.*?\')?', 'TypedefType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\' (?P.+)', - 'VarDecl': r"^ (?P
[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?P.+) '(?P.+)'(?P.*)", - 'DeprecatedAttr': r"^ (?P
[0-9a-fx]+) <(?P.*)> \"(?P.*?)\" \"(?P.*?)\"", - 'BinaryOperator': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' '(?P.*?)'", - 'MemberExpr': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' lvalue ->(?P\w+) (?P[0-9a-fx]+)", - 'CStyleCastExpr': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' <(?P.*)>", - 'CharacterLiteral': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' (?P\d+)", 'UnaryOperator': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)'(?P lvalue)?(?P prefix)?(?P postfix)? '(?P.*?)'", - 'DeclStmt': r"^ (?P
[0-9a-fx]+) <(?P.*)>", - 'ForStmt': r"^ (?P
[0-9a-fx]+) <(?P.*)>", - 'BreakStmt': r"^ (?P
[0-9a-fx]+) <(?P.*)>", + 'VarDecl': r"^ (?P
[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?P.+) '(?P.+)'(?P.*)", } def build_tree(nodes, depth): @@ -108,16 +108,20 @@ def convert_lines_to_nodes(lines): print("Can not understand line '%s'" % line) sys.exit(1) + node_type = indent_and_type.group(2) + # if node_type == 'DeclRefExpr': + # print(line[offset:]) + offset = len(indent_and_type.group(0)) try: - result = re.search(regex[indent_and_type.group(2)], line[offset:]) + result = re.search(regex[node_type], line[offset:]) except KeyError: - print("There is no regex for '%s'." % indent_and_type.group(2)) + print("There is no regex for '%s'." % node_type) print("I will print out all the lines so a regex can be created:\n") for line in lines: s = re.search(r'^([|\- `]*)(\w+)', line) - if s is not None and indent_and_type.group(2) == s.group(2): + if s is not None and node_type == s.group(2): print(line[offset:]) sys.exit(1) @@ -128,7 +132,7 @@ def convert_lines_to_nodes(lines): node = result.groupdict() - node['node'] = indent_and_type.group(2) + node['node'] = node_type indent_level = len(indent_and_type.group(1)) / 2 nodes.append([indent_level, node]) diff --git a/c2go.py b/c2go.py index a2180c452..7c148e494 100644 --- a/c2go.py +++ b/c2go.py @@ -34,7 +34,8 @@ def is_identifier(w): def resolve_type(s): s = s.strip() - if s == 'const char *' or s == 'const char*' or s == 'char *' or s == 'const char *restrict': + if s == 'const char *' or s == 'const char*' or s == 'char *' or \ + s == 'const char *restrict' or s == 'const char *__restrict': return 'string' if s == 'float': @@ -100,7 +101,7 @@ def resolve_type(s): if '(*)' in s or s == '__sFILEX *' or s == 'fpos_t': return "interface{}" - return s + # return s raise Exception('Cannot resolve type "%s"' % s) @@ -151,8 +152,6 @@ def render_expression(node): return '// CONDITIONAL_OPERATOR: %s' % ''.join([t.spelling for t in node.get_tokens()]), 'unknown' if node['node'] == 'UNARY_OPERATOR': - # print(children[2].kind.name) - expr_start = list(node.get_children())[0].extent.start.column operator = None for t in node.get_tokens(): @@ -203,8 +202,8 @@ def render_expression(node): return name, e[1] - if node['node'] in ('CHARACTER_LITERAL', 'STRING_LITERAL', 'FLOATING_LITERAL'): - return list(node.get_tokens())[0].spelling, 'const char*' + if node['node'] in ('CHARACTER_LITERAL', 'StringLiteral', 'FLOATING_LITERAL'): + return node['value'], 'const char*' if node['node'] == 'INTEGER_LITERAL': literal = list(node.get_tokens())[0].spelling @@ -218,7 +217,7 @@ def render_expression(node): return '(%s)' % e[0], e[1] if node['node'] == 'DeclRefExpr': - return node['unknown'], node['type'] + return node['name'], node['type'] if node['node'] == 'ImplicitCastExpr': return render_expression(node['children'][0]) @@ -283,9 +282,9 @@ def render_expression(node): if node['node'] == 'PARM_DECL': return resolve_type(node.type.spelling), 'unknown' - return node['node'], 'unknown' + # return node['node'], 'unknown' - #raise Exception('render_expression: %s' % node.kind) + raise Exception('render_expression: %s' % node['node']) def print_children(node): print(len(list(node.get_children())), [t.spelling for t in node.get_tokens()]) @@ -315,8 +314,6 @@ def render(out, node, indent=0, return_type=None): for c in node['children']: if c['node'] == 'CompoundStmt': has_body = True - # print(function_name) - # print(json.dumps(node['children'])) args = [] # for a in get_function_params(node): @@ -339,7 +336,7 @@ def render(out, node, indent=0, return_type=None): print_line(out, '}\n', indent) - # function_defs[node.spelling] = (node.result_type.spelling, [a.type.spelling for a in node.get_arguments()]) + function_defs[node['name']] = (node['type'], [a['type'] for a in get_function_params(node)]) return From a33b4eb146bf74f3fb0810f0eb52e3a76832320a Mon Sep 17 00:00:00 2001 From: Elliot Chance Date: Sat, 25 Mar 2017 12:07:56 +1100 Subject: [PATCH 4/5] argv.c array.c comments.c fib.c now working --- ast2json.py | 9 ++-- c2go.py | 118 ++++++++++++++++++++++++++-------------------------- 2 files changed, 66 insertions(+), 61 deletions(-) diff --git a/ast2json.py b/ast2json.py index bebaf8add..b7e6f23c3 100644 --- a/ast2json.py +++ b/ast2json.py @@ -23,6 +23,7 @@ regex = { 'AlwaysInlineAttr': r"^ (?P
[0-9a-fx]+) <(?P.*)> always_inline", + 'ArraySubscriptExpr': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' (?P.*)", 'AsmLabelAttr': r"^ (?P
[0-9a-fx]+) <(?P.*)> \"(?P.+)\"", 'AvailabilityAttr': r"^ (?P
[0-9a-fx]+) <(?P.*)> (?P\w+) (?P[\d.]+) (?P[\d.]+) (?P[\d.]+) (?P\".*?\"|\w+) (?P\".*?\"|\w+)", 'BinaryOperator': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' '(?P.*?)'", @@ -38,15 +39,16 @@ 'DeprecatedAttr': r"^ (?P
[0-9a-fx]+) <(?P.*)> \"(?P.*?)\" \"(?P.*?)\"", 'ElaboratedType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\' (?P.+)', 'FieldDecl': r'^ (?P
[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?P.+)', + 'FloatingLiteral': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*)' (?P.+)", 'FormatAttr': r'^ (?P
[0-9a-fx]+) <(?P.*)>(?P Implicit)? (?P\w+) (?P\d+) (?P\d+)', 'ForStmt': r"^ (?P
[0-9a-fx]+) <(?P.*)>", 'FunctionDecl': r"^ (?P
[0-9a-fx]+) (?Pprev [0-9a-fx]+)? ?<(?P.*)> (?P[^ ]+)(?P implicit)?(?P used)? (?P\w+) '(?P.*)'(?P extern)?", 'IfStmt': r'^ (?P
[0-9a-fx]+) <(?P.*)>', 'ImplicitCastExpr': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*)\' <(?P.*)>', - 'IntegerLiteral': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*)\' (?P\d+)', - 'MemberExpr': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' lvalue ->(?P\w+) (?P[0-9a-fx]+)", + 'IntegerLiteral': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*)\' (?P.+)', + 'MemberExpr': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' (?P.*?)(?P\w+) (?P[0-9a-fx]+)", 'ParenExpr': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*?)\'', - 'ParmVarDecl': r'^ (?P
[0-9a-fx]+) <(?P.*)> (?P.+) \'(?P.*?)\'(?P:\'.*?\')?', + 'ParmVarDecl': r"^ (?P
[0-9a-fx]+) <(?P.*)> (?P.+?)(?P \w+)? '(?P.*?)'(?P:'.*?')?", 'PointerType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', 'Record': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', 'RecordDecl': r'^ (?P
[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?P.+)', @@ -59,6 +61,7 @@ 'TypedefType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\' (?P.+)', 'UnaryOperator': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)'(?P lvalue)?(?P prefix)?(?P postfix)? '(?P.*?)'", 'VarDecl': r"^ (?P
[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?P.+) '(?P.+)'(?P.*)", + 'WhileStmt': r"^ (?P
[0-9a-fx]+) <(?P.*)>", } def build_tree(nodes, depth): diff --git a/c2go.py b/c2go.py index 7c148e494..5cb35f1ea 100644 --- a/c2go.py +++ b/c2go.py @@ -56,7 +56,7 @@ def resolve_type(s): if s == 'int' or s == '__darwin_ct_rune_t': return s - if s == 'long': + if s == 'long' or s == '__mbstate_t' or s == '__builtin_va_list': return 'int64' if s == 'long long': @@ -86,6 +86,9 @@ def resolve_type(s): if s == 'long int': return 'int32' + if s == '__int128': + return 'int64' + if re.match('unsigned char \\[\\d+\\]', s): return s[14:] + 'byte' @@ -101,9 +104,9 @@ def resolve_type(s): if '(*)' in s or s == '__sFILEX *' or s == 'fpos_t': return "interface{}" - # return s + return s - raise Exception('Cannot resolve type "%s"' % s) + # raise Exception('Cannot resolve type "%s"' % s) def cast(expr, from_type, to_type): from_type = resolve_type(from_type) @@ -128,15 +131,9 @@ def print_line(out, line, indent): out.write('%s%s\n' % ('\t' * indent, line)) def render_expression(node): - if node['node'] == 'BINARY_OPERATOR': - end_of_left = list(node.get_children())[0].extent.end.column - operator = None - for t in node.get_tokens(): - if t.extent.start.column >= end_of_left: - operator = t.spelling - break - - left, right = [render_expression(t)[0] for t in list(node.get_children())] + if node['node'] == 'BinaryOperator': + operator = node['operator'] + left, right = [render_expression(t)[0] for t in node['children']] return_type = 'bool' if operator == '|' or operator == '&': @@ -151,19 +148,9 @@ def render_expression(node): except TypeError: return '// CONDITIONAL_OPERATOR: %s' % ''.join([t.spelling for t in node.get_tokens()]), 'unknown' - if node['node'] == 'UNARY_OPERATOR': - expr_start = list(node.get_children())[0].extent.start.column - operator = None - for t in node.get_tokens(): - if t.extent.start.column >= expr_start: - break - - operator = t.spelling - - if operator is None: - operator = '++' - - expr = render_expression(list(node.get_children())[0]) + if node['node'] == 'UnaryOperator': + operator = node['operator'] + expr = render_expression(node['children'][0]) if operator == '!': return '%s(%s)' % ('__not_%s' % expr[1], expr[0]), expr[1] @@ -193,20 +180,20 @@ def render_expression(node): e = render_expression(children[0]) name = e[0] - if name == 'argc': - name = 'len(os.Args)' - add_import("os") - elif name == 'argv': - name = 'os.Args' - add_import("os") + # if name == 'argc': + # name = 'len(os.Args)' + # add_import("os") + # elif name == 'argv': + # name = 'os.Args' + # add_import("os") return name, e[1] if node['node'] in ('CHARACTER_LITERAL', 'StringLiteral', 'FLOATING_LITERAL'): return node['value'], 'const char*' - if node['node'] == 'INTEGER_LITERAL': - literal = list(node.get_tokens())[0].spelling + if node['node'] == 'IntegerLiteral': + literal = node['value'] if literal[-1] == 'L': literal = '%s(%s)' % (resolve_type('long'), literal[:-1]) @@ -217,7 +204,16 @@ def render_expression(node): return '(%s)' % e[0], e[1] if node['node'] == 'DeclRefExpr': - return node['name'], node['type'] + name = node['name'] + + if name == 'argc': + name = 'len(os.Args)' + add_import("os") + elif name == 'argv': + name = 'os.Args' + add_import("os") + + return name, node['type'] if node['node'] == 'ImplicitCastExpr': return render_expression(node['children'][0]) @@ -247,8 +243,8 @@ def render_expression(node): return '%s(%s)' % (func_name, ', '.join(args)), func_def[0] - if node['node'] == 'ARRAY_SUBSCRIPT_EXPR': - children = list(node.get_children()) + if node['node'] == 'ArraySubscriptExpr': + children = node['children'] return '%s[%s]' % (render_expression(children[0])[0], render_expression(children[1])[0]), 'unknown' @@ -260,22 +256,22 @@ def render_expression(node): children = list(node.get_children()) return render_expression(children[0]), 'unknown' - if node['node'] == 'FIELD_DECL' or node['node'] == 'VAR_DECL': - type = resolve_type(node.type.spelling) - name = node.spelling + if node['node'] == 'FIELD_DECL' or node['node'] == 'VarDecl': + type = resolve_type(node['type']) + name = node['name'].replace('used', '') prefix = '' - if node['node'] == 'VAR_DECL': + if node['node'] == 'VarDecl': prefix = 'var ' suffix = '' - children = list(node.get_children()) + # children = node['children'] # We must check the position of the child is at the end. Otherwise a # child can refer to another expression like the size of the data type. - if len(children) > 0 and children[0].extent.end.column == node.extent.end.column: - e = render_expression(children[0]) - suffix = ' = %s' % cast(e[0], e[1], type) + # if len(children) > 0 and children[0].extent.end.column == node.extent.end.column: + # e = render_expression(children[0]) + # suffix = ' = %s' % cast(e[0], e[1], type) return '%s%s %s%s' % (prefix, name, type, suffix), 'unknown' @@ -316,8 +312,8 @@ def render(out, node, indent=0, return_type=None): has_body = True args = [] - # for a in get_function_params(node): - # args.append('%s %s' % (a['name'], resolve_type(a['type']))) + for a in get_function_params(node): + args.append('%s %s' % (a['name'], resolve_type(a['type']))) if has_body: return_type = ' ' + node['type'] @@ -377,17 +373,17 @@ def render(out, node, indent=0, return_type=None): # return - # if node['node'] == 'FOR_STMT': - # children = list(node.get_children()) + if node['node'] == 'ForStmt': + children = node['children'] - # a, b, c = [render_expression(e)[0] for e in children[:3]] - # print_line(out, 'for %s; %s; %s {' % (a, b, c), indent) + a, b, c = [render_expression(e)[0] for e in children[:3]] + print_line(out, 'for %s; %s; %s {' % (a, b, c), indent) - # render(out, children[3], indent + 1, return_type) + render(out, children[3], indent + 1, return_type) - # print_line(out, '}', indent) + print_line(out, '}', indent) - # return + return # if node['node'] == 'BREAK_STMT': # print_line(out, 'break', indent) @@ -419,7 +415,13 @@ def render(out, node, indent=0, return_type=None): return if node['node'] == 'TypedefDecl': - print_line(out, "type %s %s\n" % (node['type'], node['name']), indent) + # FIXME: All of the logic here is just to avoid errors, it needs to be + # fixed up. + if 'struct' in node['type'] or 'union' in node['type']: + return + node['type'] = node['type'].replace('unsigned', '') + + print_line(out, "type %s %s\n" % (node['name'], resolve_type(node['type'])), indent) # print(node) return @@ -472,10 +474,10 @@ def render(out, node, indent=0, return_type=None): # print_line(out, '// ' + ' '.join(tokens[1:-2]), indent) # return - # if node['node'] == 'DECL_STMT': - # for child in node.get_children(): - # print_line(out, render_expression(child)[0], indent) - # return + if node['node'] == 'DeclStmt': + for child in node['children']: + print_line(out, render_expression(child)[0], indent) + return if node['node'] == 'VarDecl': # tokens = [t.spelling for t in node.get_tokens()] From d0415975810f774beb4b14e4caefc02d717c485e Mon Sep 17 00:00:00 2001 From: Elliot Chance Date: Sat, 25 Mar 2017 13:51:45 +1100 Subject: [PATCH 5/5] All tests now parsing --- ast2json.py | 10 ++--- c2go.py | 111 +++++++++++++++++++++------------------------------- 2 files changed, 50 insertions(+), 71 deletions(-) diff --git a/ast2json.py b/ast2json.py index b7e6f23c3..3f8827153 100644 --- a/ast2json.py +++ b/ast2json.py @@ -34,11 +34,11 @@ 'CompoundStmt': r'^ (?P
[0-9a-fx]+) <(?P.*)>', 'ConstantArrayType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\' (?P\d+)', 'CStyleCastExpr': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' <(?P.*)>", - 'DeclRefExpr': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' (lvalue (?P\w+)|Function) (?P[0-9a-fx]+) '(?P.*?)' '(?P.*?)'", + 'DeclRefExpr': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)'.*? (lvalue (?P\w+)|Function) (?P[0-9a-fx]+) '(?P.*?)' '(?P.*?)'", 'DeclStmt': r"^ (?P
[0-9a-fx]+) <(?P.*)>", 'DeprecatedAttr': r"^ (?P
[0-9a-fx]+) <(?P.*)> \"(?P.*?)\" \"(?P.*?)\"", 'ElaboratedType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\' (?P.+)', - 'FieldDecl': r'^ (?P
[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?P.+)', + 'FieldDecl': r"^ (?P
[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?P.*?)(?P\w+?) '(?P.+?)'", 'FloatingLiteral': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*)' (?P.+)", 'FormatAttr': r'^ (?P
[0-9a-fx]+) <(?P.*)>(?P Implicit)? (?P\w+) (?P\d+) (?P\d+)', 'ForStmt': r"^ (?P
[0-9a-fx]+) <(?P.*)>", @@ -51,7 +51,7 @@ 'ParmVarDecl': r"^ (?P
[0-9a-fx]+) <(?P.*)> (?P.+?)(?P \w+)? '(?P.*?)'(?P:'.*?')?", 'PointerType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', 'Record': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', - 'RecordDecl': r'^ (?P
[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?P.+)', + 'RecordDecl': r"^ (?P
[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?Pstruct|union) (?P\w+)", 'RecordType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', 'ReturnStmt': r'^ (?P
[0-9a-fx]+) <(?P.*)>', 'StringLiteral': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*)\'(?P lvalue)? (?P.*)', @@ -60,7 +60,7 @@ 'TypedefDecl': r'^ (?P
[0-9a-fx]+) <(?P.+?)> (?P|[^ ]+)(?P.*?) (?P\w+) \'(?P.*?)\'(?P:\'.*?\')?', 'TypedefType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\' (?P.+)', 'UnaryOperator': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)'(?P lvalue)?(?P prefix)?(?P postfix)? '(?P.*?)'", - 'VarDecl': r"^ (?P
[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?P.+) '(?P.+)'(?P.*)", + 'VarDecl': r"^ (?P
[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?P.+) '(?P.+?)'.*?(?P.*)", 'WhileStmt': r"^ (?P
[0-9a-fx]+) <(?P.*)>", } @@ -112,7 +112,7 @@ def convert_lines_to_nodes(lines): sys.exit(1) node_type = indent_and_type.group(2) - # if node_type == 'DeclRefExpr': + # if node_type == 'FieldDecl': # print(line[offset:]) offset = len(indent_and_type.group(0)) diff --git a/c2go.py b/c2go.py index 5cb35f1ea..0837953d7 100644 --- a/c2go.py +++ b/c2go.py @@ -41,7 +41,7 @@ def resolve_type(s): if s == 'float': return 'float32' - if s == 'void *': + if s == 'void *' or s == '__darwin_pthread_handler_rec *': return 'interface{}' if s == 'char': @@ -104,6 +104,9 @@ def resolve_type(s): if '(*)' in s or s == '__sFILEX *' or s == 'fpos_t': return "interface{}" + if '(' in s: + return 'interface{}' + return s # raise Exception('Cannot resolve type "%s"' % s) @@ -169,27 +172,7 @@ def render_expression(node): return '%s%s' % (operator, expr[0]), expr[1] - if node['node'] == 'UNEXPOSED_EXPR': - children = list(node.get_children()) - if len(children) < 1: - return '// UNEXPOSED_EXPR: %s' % ''.join([t.spelling for t in node.get_tokens()]), 'unknown' - - # if len(children) > 1: - # raise Exception('To many children!') - - e = render_expression(children[0]) - name = e[0] - - # if name == 'argc': - # name = 'len(os.Args)' - # add_import("os") - # elif name == 'argv': - # name = 'os.Args' - # add_import("os") - - return name, e[1] - - if node['node'] in ('CHARACTER_LITERAL', 'StringLiteral', 'FLOATING_LITERAL'): + if node['node'] in ('CHARACTER_LITERAL', 'StringLiteral', 'FloatingLiteral'): return node['value'], 'const char*' if node['node'] == 'IntegerLiteral': @@ -248,15 +231,15 @@ def render_expression(node): return '%s[%s]' % (render_expression(children[0])[0], render_expression(children[1])[0]), 'unknown' - if node['node'] == 'MEMBER_REF_EXPR': - children = list(node.get_children()) - return '%s.%s' % (render_expression(children[0])[0], list(node.get_tokens())[-2].spelling), 'unknown' + if node['node'] == 'MemberExpr': + children = node['children'] + return '%s.%s' % (render_expression(children[0])[0], node['name']), children[0]['type'] if node['node'] == 'CSTYLE_CAST_EXPR': children = list(node.get_children()) return render_expression(children[0]), 'unknown' - if node['node'] == 'FIELD_DECL' or node['node'] == 'VarDecl': + if node['node'] == 'FieldDecl' or node['node'] == 'VarDecl': type = resolve_type(node['type']) name = node['name'].replace('used', '') @@ -265,13 +248,9 @@ def render_expression(node): prefix = 'var ' suffix = '' - # children = node['children'] - - # We must check the position of the child is at the end. Otherwise a - # child can refer to another expression like the size of the data type. - # if len(children) > 0 and children[0].extent.end.column == node.extent.end.column: - # e = render_expression(children[0]) - # suffix = ' = %s' % cast(e[0], e[1], type) + if 'children' in node: + children = node['children'] + suffix = ' = %s' % render_expression(children[0])[0] return '%s%s %s%s' % (prefix, name, type, suffix), 'unknown' @@ -317,7 +296,7 @@ def render(out, node, indent=0, return_type=None): if has_body: return_type = ' ' + node['type'] - if return_type == ' void': + if return_type == ' void ()': return_type = '' if function_name == 'main': @@ -345,33 +324,33 @@ def render(out, node, indent=0, return_type=None): render(out, c, indent, return_type) return - # if node['node'] == 'IF_STMT': - # children = list(node.get_children()) + if node['node'] == 'IfStmt': + children = node['children'] - # e = render_expression(children[0]) - # print_line(out, 'if %s {' % cast(e[0], e[1], 'bool'), indent) + e = render_expression(children[0]) + print_line(out, 'if %s {' % cast(e[0], e[1], 'bool'), indent) - # render(out, children[1], indent + 1, return_type) + render(out, children[1], indent + 1, return_type) - # if len(children) > 2: - # print_line(out, '} else {', indent) - # render(out, children[2], indent + 1, return_type) + if len(children) > 2: + print_line(out, '} else {', indent) + render(out, children[2], indent + 1, return_type) - # print_line(out, '}', indent) + print_line(out, '}', indent) - # return + return - # if node['node'] == 'WHILE_STMT': - # children = list(node.get_children()) + if node['node'] == 'WhileStmt': + children = node['children'] - # e = render_expression(children[0]) - # print_line(out, 'for %s {' % cast(e[0], e[1], 'bool'), indent) + e = render_expression(children[0]) + print_line(out, 'for %s {' % cast(e[0], e[1], 'bool'), indent) - # render(out, children[1], indent + 1, return_type) + render(out, children[1], indent + 1, return_type) - # print_line(out, '}', indent) + print_line(out, '}', indent) - # return + return if node['node'] == 'ForStmt': children = node['children'] @@ -385,21 +364,13 @@ def render(out, node, indent=0, return_type=None): return - # if node['node'] == 'BREAK_STMT': - # print_line(out, 'break', indent) - # return - - # if node['node'] == 'UNARY_OPERATOR': - # variable, operator = [t.spelling for t in list(node.get_tokens())[0:2]] - # if operator == '++': - # print_line(out, '%s += 1' % variable, indent) - # #print_line(out, '%s = string(%s[1:])' % (variable, variable), indent) - # return - - # print_line(out, '%s%s' % (operator, variable), indent) - # return + if node['node'] == 'BreakStmt': + print_line(out, 'break', indent) + return - # #raise Exception('UNARY_OPERATOR: %s' % operator) + if node['node'] == 'UnaryOperator': + print_line(out, render_expression(node)[0], indent) + return if node['node'] == 'ReturnStmt': # try: @@ -410,7 +381,7 @@ def render(out, node, indent=0, return_type=None): return - if node['node'] in ('BINARY_OPERATOR', 'INTEGER_LITERAL', 'CallExpr'): + if node['node'] in ('BinaryOperator', 'INTEGER_LITERAL', 'CallExpr'): print_line(out, render_expression(node)[0], indent) return @@ -434,6 +405,14 @@ def render(out, node, indent=0, return_type=None): return if node['node'] == 'RecordDecl': + if node['kind'] == 'union': + return + + print_line(out, "type %s %s {" % (node['name'], node['kind']), indent) + if 'children' in node: + for c in node['children']: + print_line(out, render_expression(c)[0], indent + 1) + print_line(out, "}\n", indent) return #if node['node'] == 'UNION_DECL' or node['node'] == 'STRUCT_DECL':