diff --git a/.gitignore b/.gitignore index 99284e498..835db24c1 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ /pp.c /out.go /a.out +/pp.json diff --git a/ast2json.py b/ast2json.py new file mode 100644 index 000000000..3f8827153 --- /dev/null +++ b/ast2json.py @@ -0,0 +1,149 @@ +import sys +import re +import json + +# This script converts the output of clang AST into a JSON file. +# +# Usage: +# clang -Xclang -ast-dump -fsyntax-only myfile.c | python ast2json.py +# +# Yes, there are many better ways to do this. However I chose this method +# because: +# +# 1. I need to separate the clang AST from the c2go conversion process so that +# the c2go program can ingest a reliable JSON file and not depend on clang or +# its different versions at all. +# 2. The clang API is not stable and trying to match up binaries with different +# versions and operating systems can be tricky and brittle. +# 3. This tool, in time, will become a better binary of some kind that produces +# much the same JSON output (so minimal changes to c2go.py). +# 4. I needed something quick and dirty to proof the complete toolchain and get +# it working on different versions of clang and different operating systems +# before we enough information to really standardise the process. + +regex = { + 'AlwaysInlineAttr': r"^ (?P
[0-9a-fx]+) <(?P.*)> always_inline", + 'ArraySubscriptExpr': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' (?P.*)", + 'AsmLabelAttr': r"^ (?P
[0-9a-fx]+) <(?P.*)> \"(?P.+)\"", + 'AvailabilityAttr': r"^ (?P
[0-9a-fx]+) <(?P.*)> (?P\w+) (?P[\d.]+) (?P[\d.]+) (?P[\d.]+) (?P\".*?\"|\w+) (?P\".*?\"|\w+)", + 'BinaryOperator': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' '(?P.*?)'", + 'BreakStmt': r"^ (?P
[0-9a-fx]+) <(?P.*)>", + 'BuiltinType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', + 'CallExpr': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*?)\'', + 'CharacterLiteral': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' (?P\d+)", + 'CompoundStmt': r'^ (?P
[0-9a-fx]+) <(?P.*)>', + 'ConstantArrayType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\' (?P\d+)', + 'CStyleCastExpr': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' <(?P.*)>", + 'DeclRefExpr': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)'.*? (lvalue (?P\w+)|Function) (?P[0-9a-fx]+) '(?P.*?)' '(?P.*?)'", + 'DeclStmt': r"^ (?P
[0-9a-fx]+) <(?P.*)>", + 'DeprecatedAttr': r"^ (?P
[0-9a-fx]+) <(?P.*)> \"(?P.*?)\" \"(?P.*?)\"", + 'ElaboratedType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\' (?P.+)', + 'FieldDecl': r"^ (?P
[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?P.*?)(?P\w+?) '(?P.+?)'", + 'FloatingLiteral': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*)' (?P.+)", + 'FormatAttr': r'^ (?P
[0-9a-fx]+) <(?P.*)>(?P Implicit)? (?P\w+) (?P\d+) (?P\d+)', + 'ForStmt': r"^ (?P
[0-9a-fx]+) <(?P.*)>", + 'FunctionDecl': r"^ (?P
[0-9a-fx]+) (?Pprev [0-9a-fx]+)? ?<(?P.*)> (?P[^ ]+)(?P implicit)?(?P used)? (?P\w+) '(?P.*)'(?P extern)?", + 'IfStmt': r'^ (?P
[0-9a-fx]+) <(?P.*)>', + 'ImplicitCastExpr': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*)\' <(?P.*)>', + 'IntegerLiteral': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*)\' (?P.+)', + 'MemberExpr': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)' (?P.*?)(?P\w+) (?P[0-9a-fx]+)", + 'ParenExpr': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*?)\'', + 'ParmVarDecl': r"^ (?P
[0-9a-fx]+) <(?P.*)> (?P.+?)(?P \w+)? '(?P.*?)'(?P:'.*?')?", + 'PointerType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', + 'Record': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', + 'RecordDecl': r"^ (?P
[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?Pstruct|union) (?P\w+)", + 'RecordType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', + 'ReturnStmt': r'^ (?P
[0-9a-fx]+) <(?P.*)>', + 'StringLiteral': r'^ (?P
[0-9a-fx]+) <(?P.*)> \'(?P.*)\'(?P lvalue)? (?P.*)', + 'TranslationUnitDecl': r'^ (?P
[0-9a-fx]+)', + 'Typedef': r'^ (?P
[0-9a-fx]+) \'(?P.*)\'', + 'TypedefDecl': r'^ (?P
[0-9a-fx]+) <(?P.+?)> (?P|[^ ]+)(?P.*?) (?P\w+) \'(?P.*?)\'(?P:\'.*?\')?', + 'TypedefType': r'^ (?P
[0-9a-fx]+) \'(?P.*)\' (?P.+)', + 'UnaryOperator': r"^ (?P
[0-9a-fx]+) <(?P.*)> '(?P.*?)'(?P lvalue)?(?P prefix)?(?P postfix)? '(?P.*?)'", + 'VarDecl': r"^ (?P
[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?P.+) '(?P.+?)'.*?(?P.*)", + 'WhileStmt': r"^ (?P
[0-9a-fx]+) <(?P.*)>", +} + +def build_tree(nodes, depth): + """Convert an array of nodes, each prefixed with a depth into a tree.""" + if len(nodes) == 0: + return [] + + # Split the list into sections, treat each section as a a tree with its own + # root. + sections = [] + for node in nodes: + if node[0] == depth: + sections.append([node]) + else: + sections[-1].append(node) + + results = [] + for section in sections: + children = build_tree([n for n in section if n[0] > depth], depth + 1) + result = section[0][1] + + if len(children) > 0: + result['children'] = children + + results.append(result) + + return results + +def read_ast(): + stdin = sys.stdin.read() + uncolored = re.sub(r'\x1b\[[\d;]+m', '', stdin) + return uncolored.split("\n") + +def convert_lines_to_nodes(lines): + nodes = [] + for line in lines: + if line.strip() == '': + continue + + # This will need to be handled more gracefully... I'm not even sure + # what this means? + if '<<>>' in line: + continue + + indent_and_type = re.search(r'^([|\- `]*)(\w+)', line) + if indent_and_type is None: + print("Can not understand line '%s'" % line) + sys.exit(1) + + node_type = indent_and_type.group(2) + # if node_type == 'FieldDecl': + # print(line[offset:]) + + offset = len(indent_and_type.group(0)) + try: + result = re.search(regex[node_type], line[offset:]) + except KeyError: + print("There is no regex for '%s'." % node_type) + print("I will print out all the lines so a regex can be created:\n") + + for line in lines: + s = re.search(r'^([|\- `]*)(\w+)', line) + if s is not None and node_type == s.group(2): + print(line[offset:]) + + sys.exit(1) + + if result is None: + print("Can not understand line '%s'" % line) + sys.exit(1) + + node = result.groupdict() + + node['node'] = node_type + + indent_level = len(indent_and_type.group(1)) / 2 + nodes.append([indent_level, node]) + + return nodes + +lines = read_ast() +nodes = convert_lines_to_nodes(lines) +tree = build_tree(nodes, 0) + +print(json.dumps(tree, sort_keys=True, indent=2, separators=(',', ': '))) diff --git a/c2go.py b/c2go.py index 614c00f9a..159e0de77 100644 --- a/c2go.py +++ b/c2go.py @@ -2,10 +2,11 @@ # -*- coding: utf-8 -*- import sys -import clang.cindex import pprint import re import subprocess +import json + try: import StringIO as io except ImportError: @@ -40,13 +41,14 @@ def is_identifier(w): def resolve_type(s): s = s.strip() - if s == 'const char *' or s == 'const char*' or s == 'char *' or s == 'const char *restrict': + if s == 'const char *' or s == 'const char*' or s == 'char *' or \ + s == 'const char *restrict' or s == 'const char *__restrict': return 'string' if s == 'float': return 'float32' - if s == 'void *': + if s == 'void *' or s == '__darwin_pthread_handler_rec *': return 'interface{}' if s == 'char': @@ -61,7 +63,7 @@ def resolve_type(s): if s == 'int' or s == '__darwin_ct_rune_t': return s - if s == 'long': + if s == 'long' or s == '__mbstate_t' or s == '__builtin_va_list': return 'int64' if s == 'long long': @@ -91,6 +93,9 @@ def resolve_type(s): if s == 'long int': return 'int32' + if s == '__int128': + return 'int64' + if re.match('unsigned char \\[\\d+\\]', s): return s[14:] + 'byte' @@ -106,9 +111,12 @@ def resolve_type(s): if '(*)' in s or s == '__sFILEX *' or s == 'fpos_t': return "interface{}" + if '(' in s: + return 'interface{}' + return s - raise Exception('Cannot resolve type "%s"' % s) + # raise Exception('Cannot resolve type "%s"' % s) def cast(expr, from_type, to_type): from_type = resolve_type(from_type) @@ -133,15 +141,9 @@ def print_line(out, line, indent): out.write('%s%s\n' % ('\t' * indent, line)) def render_expression(node): - if node.kind.name == 'BINARY_OPERATOR': - end_of_left = list(node.get_children())[0].extent.end.column - operator = None - for t in node.get_tokens(): - if t.extent.start.column >= end_of_left: - operator = t.spelling - break - - left, right = [render_expression(t)[0] for t in list(node.get_children())] + if node['node'] == 'BinaryOperator': + operator = node['operator'] + left, right = [render_expression(t)[0] for t in node['children']] return_type = 'bool' if operator == '|' or operator == '&': @@ -149,28 +151,16 @@ def render_expression(node): return '%s %s %s' % (left, operator, right), return_type - if node.kind.name == 'CONDITIONAL_OPERATOR': + if node['node'] == 'CONDITIONAL_OPERATOR': a, b, c = [render_expression(t) for t in list(node.get_children())] try: return '__ternary(%s, %s, %s)' % (cast(a[0], 'bool'), b[0], c[0]), b[1] except TypeError: return '// CONDITIONAL_OPERATOR: %s' % ''.join([t.spelling for t in node.get_tokens()]), 'unknown' - if node.kind.name == 'UNARY_OPERATOR': - # print(children[2].kind.name) - - expr_start = list(node.get_children())[0].extent.start.column - operator = None - for t in node.get_tokens(): - if t.extent.start.column >= expr_start: - break - - operator = t.spelling - - if operator is None: - operator = '++' - - expr = render_expression(list(node.get_children())[0]) + if node['node'] == 'UnaryOperator': + operator = node['operator'] + expr = render_expression(node['children'][0]) if operator == '!': return '%s(%s)' % ('__not_%s' % expr[1], expr[0]), expr[1] @@ -189,16 +179,22 @@ def render_expression(node): return '%s%s' % (operator, expr[0]), expr[1] - if node.kind.name == 'UNEXPOSED_EXPR': - children = list(node.get_children()) - if len(children) < 1: - return '// UNEXPOSED_EXPR: %s' % ''.join([t.spelling for t in node.get_tokens()]), 'unknown' + if node['node'] in ('CHARACTER_LITERAL', 'StringLiteral', 'FloatingLiteral'): + return node['value'], 'const char*' - # if len(children) > 1: - # raise Exception('To many children!') + if node['node'] == 'IntegerLiteral': + literal = node['value'] + if literal[-1] == 'L': + literal = '%s(%s)' % (resolve_type('long'), literal[:-1]) - e = render_expression(children[0]) - name = e[0] + return literal, 'int' + + if node['node'] == 'PAREN_EXPR': + e = render_expression(list(node.get_children())[0]) + return '(%s)' % e[0], e[1] + + if node['node'] == 'DeclRefExpr': + name = node['name'] if name == 'argc': name = 'len(os.Args)' @@ -207,27 +203,13 @@ def render_expression(node): name = 'os.Args' add_import("os") - return name, e[1] - - if node.kind.name in ('CHARACTER_LITERAL', 'STRING_LITERAL', 'FLOATING_LITERAL'): - return list(node.get_tokens())[0].spelling, 'const char*' + return name, node['type'] - if node.kind.name == 'INTEGER_LITERAL': - literal = list(node.get_tokens())[0].spelling - if literal[-1] == 'L': - literal = '%s(%s)' % (resolve_type('long'), literal[:-1]) - - return literal, 'int' + if node['node'] == 'ImplicitCastExpr': + return render_expression(node['children'][0]) - if node.kind.name == 'PAREN_EXPR': - e = render_expression(list(node.get_children())[0]) - return '(%s)' % e[0], e[1] - - if node.kind.name == 'DECL_REF_EXPR': - return node.spelling, node.type.spelling - - if node.kind.name == 'CALL_EXPR': - children = list(node.get_children()) + if node['node'] == 'CallExpr': + children = node['children'] func_name = render_expression(children[0])[0] func_def = function_defs[func_name] @@ -251,74 +233,77 @@ def render_expression(node): return '%s(%s)' % (func_name, ', '.join(args)), func_def[0] - if node.kind.name == 'ARRAY_SUBSCRIPT_EXPR': - children = list(node.get_children()) + if node['node'] == 'ArraySubscriptExpr': + children = node['children'] return '%s[%s]' % (render_expression(children[0])[0], render_expression(children[1])[0]), 'unknown' - if node.kind.name == 'MEMBER_REF_EXPR': - children = list(node.get_children()) - return '%s.%s' % (render_expression(children[0])[0], list(node.get_tokens())[-2].spelling), 'unknown' + if node['node'] == 'MemberExpr': + children = node['children'] + return '%s.%s' % (render_expression(children[0])[0], node['name']), children[0]['type'] - if node.kind.name == 'CSTYLE_CAST_EXPR': + if node['node'] == 'CSTYLE_CAST_EXPR': children = list(node.get_children()) return render_expression(children[0]), 'unknown' - if node.kind.name == 'FIELD_DECL' or node.kind.name == 'VAR_DECL': - type = resolve_type(node.type.spelling) - name = node.spelling + if node['node'] == 'FieldDecl' or node['node'] == 'VarDecl': + type = resolve_type(node['type']) + name = node['name'].replace('used', '') prefix = '' - if node.kind.name == 'VAR_DECL': + if node['node'] == 'VarDecl': prefix = 'var ' suffix = '' - children = list(node.get_children()) - - # We must check the position of the child is at the end. Otherwise a - # child can refer to another expression like the size of the data type. - if len(children) > 0 and children[0].extent.end.column == node.extent.end.column: - e = render_expression(children[0]) - suffix = ' = %s' % cast(e[0], e[1], type) + if 'children' in node: + children = node['children'] + suffix = ' = %s' % render_expression(children[0])[0] return '%s%s %s%s' % (prefix, name, type, suffix), 'unknown' - if node.kind.name == 'PARM_DECL': + if node['node'] == 'PARM_DECL': return resolve_type(node.type.spelling), 'unknown' - return node.kind.name, 'unknown' + # return node['node'], 'unknown' - #raise Exception('render_expression: %s' % node.kind) + raise Exception('render_expression: %s' % node['node']) def print_children(node): print(len(list(node.get_children())), [t.spelling for t in node.get_tokens()]) for child in node.get_children(): print(child.kind.name, render_expression(child), [t.spelling for t in child.get_tokens()]) +def get_function_params(nodes): + if 'children' not in nodes: + return [] + + return [n for n in nodes['children'] if n['node'] == 'ParmVarDecl'] + def render(out, node, indent=0, return_type=None): - if node.kind.name == 'TRANSLATION_UNIT': - for c in node.get_children(): + if node['node'] == 'TranslationUnitDecl': + for c in node['children']: render(out, c, indent, return_type) return - if node.kind.name == 'FUNCTION_DECL': - function_name = node.spelling + if node['node'] == 'FunctionDecl': + function_name = node['name'] if function_name in ('__istype', '__isctype', '__wcwidth', '__sputc'): return has_body = False - for c in node.get_children(): - if c.kind.name == 'COMPOUND_STMT': - has_body = True + if 'children' in node: + for c in node['children']: + if c['node'] == 'CompoundStmt': + has_body = True args = [] - for a in node.get_arguments(): - args.append('%s %s' % (a.spelling, resolve_type(a.type.spelling))) + for a in get_function_params(node): + args.append('%s %s' % (a['name'], resolve_type(a['type']))) if has_body: - return_type = ' ' + node.result_type.spelling - if return_type == ' void': + return_type = ' ' + node['type'] + if return_type == ' void ()': return_type = '' if function_name == 'main': @@ -327,27 +312,27 @@ def render(out, node, indent=0, return_type=None): print_line(out, 'func %s(%s)%s {' % (function_name, ', '.join(args), return_type), indent) - for c in node.get_children(): - if c.kind.name == 'COMPOUND_STMT': - render(out, c, indent + 1, node.result_type.spelling) + for c in node['children']: + if c['node'] == 'CompoundStmt': + render(out, c, indent + 1, node['type']) print_line(out, '}\n', indent) - function_defs[node.spelling] = (node.result_type.spelling, [a.type.spelling for a in node.get_arguments()]) + function_defs[node['name']] = (node['type'], [a['type'] for a in get_function_params(node)]) return - if node.kind.name == 'PARM_DECL': - print_line(out, node.spelling, indent) - return + # if node['node'] == 'PARM_DECL': + # print_line(out, node.spelling, indent) + # return - if node.kind.name == 'COMPOUND_STMT': - for c in node.get_children(): + if node['node'] == 'CompoundStmt': + for c in node['children']: render(out, c, indent, return_type) return - if node.kind.name == 'IF_STMT': - children = list(node.get_children()) + if node['node'] == 'IfStmt': + children = node['children'] e = render_expression(children[0]) print_line(out, 'if %s {' % cast(e[0], e[1], 'bool'), indent) @@ -362,8 +347,8 @@ def render(out, node, indent=0, return_type=None): return - if node.kind.name == 'WHILE_STMT': - children = list(node.get_children()) + if node['node'] == 'WhileStmt': + children = node['children'] e = render_expression(children[0]) print_line(out, 'for %s {' % cast(e[0], e[1], 'bool'), indent) @@ -374,8 +359,8 @@ def render(out, node, indent=0, return_type=None): return - if node.kind.name == 'FOR_STMT': - children = list(node.get_children()) + if node['node'] == 'ForStmt': + children = node['children'] a, b, c = [render_expression(e)[0] for e in children[:3]] print_line(out, 'for %s; %s; %s {' % (a, b, c), indent) @@ -386,23 +371,15 @@ def render(out, node, indent=0, return_type=None): return - if node.kind.name == 'BREAK_STMT': + if node['node'] == 'BreakStmt': print_line(out, 'break', indent) return - if node.kind.name == 'UNARY_OPERATOR': - variable, operator = [t.spelling for t in list(node.get_tokens())[0:2]] - if operator == '++': - print_line(out, '%s += 1' % variable, indent) - #print_line(out, '%s = string(%s[1:])' % (variable, variable), indent) - return - - print_line(out, '%s%s' % (operator, variable), indent) + if node['node'] == 'UnaryOperator': + print_line(out, render_expression(node)[0], indent) return - #raise Exception('UNARY_OPERATOR: %s' % operator) - - if node.kind.name == 'RETURN_STMT': + if node['node'] == 'ReturnStmt': # try: # e = render_expression(list(node.get_children())[0]) # print_line(out, 'return %s' % cast(e[0], e[1], return_type), indent) @@ -411,11 +388,21 @@ def render(out, node, indent=0, return_type=None): return - if node.kind.name in ('BINARY_OPERATOR', 'INTEGER_LITERAL', 'CALL_EXPR'): + if node['node'] in ('BinaryOperator', 'INTEGER_LITERAL', 'CallExpr'): print_line(out, render_expression(node)[0], indent) return - if node.kind.name == 'TYPEDEF_DECL': + if node['node'] == 'TypedefDecl': + # FIXME: All of the logic here is just to avoid errors, it needs to be + # fixed up. + if 'struct' in node['type'] or 'union' in node['type']: + return + node['type'] = node['type'].replace('unsigned', '') + + print_line(out, "type %s %s\n" % (node['name'], resolve_type(node['type'])), indent) + # print(node) + return + tokens = [t.spelling for t in node.get_tokens()] if len(list(node.get_children())) == 0: print_line(out, "type %s %s\n" % (tokens[-2], resolve_type(' '.join(tokens[1:-2]))), indent) @@ -424,67 +411,78 @@ def render(out, node, indent=0, return_type=None): return - if node.kind.name == 'UNION_DECL' or node.kind.name == 'STRUCT_DECL': - tokens = [t.spelling for t in node.get_tokens()] - - struct_name = tokens[-1] - start_at = 2 - if struct_name == ';': - struct_name = tokens[1] - start_at = 3 - - if struct_name in ('__darwin_pthread_handler_rec', '_opaque_pthread_t', - '_RuneEntry', '_RuneRange', '_RuneCharClass', '_RuneLocale'): + if node['node'] == 'RecordDecl': + if node['kind'] == 'union': return - print_line(out, "type %s struct {" % struct_name, indent) - - for attribute in node.get_children(): - print_line(out, render_expression(attribute)[0], indent + 1) - # print(struct_name, render_expression(attribute)) - - # name = '' - # type = '' - # for token in tokens[start_at:-2]: - # if token == ';': - # print_line(out, '%s %s' % (name, resolve_type(type)), indent + 1) - # type = '' - # elif is_identifier(token): - # name = token - # else: - # type += ' ' + token - + print_line(out, "type %s %s {" % (node['name'], node['kind']), indent) + if 'children' in node: + for c in node['children']: + print_line(out, render_expression(c)[0], indent + 1) print_line(out, "}\n", indent) return - if node.kind.name == 'UNEXPOSED_DECL': - tokens = [t.spelling for t in node.get_tokens()] - print_line(out, '// ' + ' '.join(tokens[1:-2]), indent) - return - - if node.kind.name == 'DECL_STMT': - for child in node.get_children(): + #if node['node'] == 'UNION_DECL' or node['node'] == 'STRUCT_DECL': + # tokens = [t.spelling for t in node.get_tokens()] + + # struct_name = tokens[-1] + # start_at = 2 + # if struct_name == ';': + # struct_name = tokens[1] + # start_at = 3 + + # if struct_name in ('__darwin_pthread_handler_rec', '_opaque_pthread_t', + # '_RuneEntry', '_RuneRange', '_RuneCharClass', '_RuneLocale'): + # return + + # print_line(out, "type %s struct {" % struct_name, indent) + + # for attribute in node.get_children(): + # print_line(out, render_expression(attribute)[0], indent + 1) + # # print(struct_name, render_expression(attribute)) + + # # name = '' + # # type = '' + # # for token in tokens[start_at:-2]: + # # if token == ';': + # # print_line(out, '%s %s' % (name, resolve_type(type)), indent + 1) + # # type = '' + # # elif is_identifier(token): + # # name = token + # # else: + # # type += ' ' + token + + # print_line(out, "}\n", indent) + # return + + # if node['node'] == 'UNEXPOSED_DECL': + # tokens = [t.spelling for t in node.get_tokens()] + # print_line(out, '// ' + ' '.join(tokens[1:-2]), indent) + # return + + if node['node'] == 'DeclStmt': + for child in node['children']: print_line(out, render_expression(child)[0], indent) return - if node.kind.name == 'VAR_DECL': - tokens = [t.spelling for t in node.get_tokens()] - if tokens[0] == 'extern': - return + if node['node'] == 'VarDecl': + # tokens = [t.spelling for t in node.get_tokens()] + # if tokens[0] == 'extern': + # return - children = list(node.get_children()) - if len(children) > 0: - print_line(out, 'var %s %s = %s\n' % (tokens[2], tokens[1], render_expression(children[0])[0]), indent) - else: - print_line(out, 'var %s %s\n' % (tokens[2], tokens[1]), indent) + # children = list(node.get_children()) + # if len(children) > 0: + # print_line(out, 'var %s %s = %s\n' % (tokens[2], tokens[1], render_expression(children[0])[0]), indent) + # else: + # print_line(out, 'var %s %s\n' % (tokens[2], tokens[1]), indent) return - if node.kind.name == 'ENUM_DECL': - print_line(out, '// enum', indent) - return + # if node['node'] == 'ENUM_DECL': + # print_line(out, '// enum', indent) + # return - raise Exception(node.kind) + raise Exception(node['node']) # 1. Compile it first (checking for errors) c_file_path = sys.argv[1] @@ -497,24 +495,33 @@ def render(out, node, indent=0, return_type=None): with open(pp_file_path, 'wb') as pp_out: pp_out.write(pp) -# 3. Parse C and output Go -index = clang.cindex.Index.create() -tu = index.parse(pp_file_path) - -go_file_path = '%s.go' % c_file_path.split('/')[-1][:-2] -# go_out = sys.stdout -go_out = io.StringIO() -#with open(go_file_path, 'w') as go_out: -# print_line(go_out, "package main\n", 0) -#print_line(go_out, 'import ("fmt"; "os")\n', 0) -render(go_out, tu.cursor) - -print("package main\n") -print("import (") -for import_name in sorted(imports): - print('\t"%s"' % import_name) -print(")\n") -print(go_out.getvalue()) - -# 4. Compile the Go -#subprocess.call(["go", "run", "functions.go", go_file_path]) +# 3. Generate JSON from AST +ast_pp = subprocess.Popen(["clang", "-Xclang", "-ast-dump", "-fsyntax-only", pp_file_path], stdout=subprocess.PIPE) +pp = subprocess.Popen(["python", "ast2json.py"], stdin=ast_pp.stdout, stdout=subprocess.PIPE).communicate()[0] + +json_file_path = 'pp.json' +with open(json_file_path, 'w') as json_out: + json_out.write(pp) + +with open(json_file_path, 'r') as json_in: + # 3. Parse C and output Go + # index = clang.cindex.Index.create() + # tu = index.parse(pp_file_path) + + go_file_path = '%s.go' % c_file_path.split('/')[-1][:-2] + # go_out = sys.stdout + go_out = io.StringIO() + #with open(go_file_path, 'w') as go_out: + # print_line(go_out, "package main\n", 0) + #print_line(go_out, 'import ("fmt"; "os")\n', 0) + render(go_out, json.loads(json_in.read())[0]) + + print("package main\n") + print("import (") + for import_name in sorted(imports): + print('\t"%s"' % import_name) + print(")\n") + print(go_out.getvalue()) + + # 4. Compile the Go + #subprocess.call(["go", "run", "functions.go", go_file_path])