diff --git a/.gitignore b/.gitignore
index 99284e498..835db24c1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
/pp.c
/out.go
/a.out
+/pp.json
diff --git a/ast2json.py b/ast2json.py
new file mode 100644
index 000000000..3f8827153
--- /dev/null
+++ b/ast2json.py
@@ -0,0 +1,149 @@
+import sys
+import re
+import json
+
+# This script converts the output of clang AST into a JSON file.
+#
+# Usage:
+# clang -Xclang -ast-dump -fsyntax-only myfile.c | python ast2json.py
+#
+# Yes, there are many better ways to do this. However I chose this method
+# because:
+#
+# 1. I need to separate the clang AST from the c2go conversion process so that
+# the c2go program can ingest a reliable JSON file and not depend on clang or
+# its different versions at all.
+# 2. The clang API is not stable and trying to match up binaries with different
+# versions and operating systems can be tricky and brittle.
+# 3. This tool, in time, will become a better binary of some kind that produces
+# much the same JSON output (so minimal changes to c2go.py).
+# 4. I needed something quick and dirty to proof the complete toolchain and get
+# it working on different versions of clang and different operating systems
+# before we enough information to really standardise the process.
+
+regex = {
+ 'AlwaysInlineAttr': r"^ (?P
[0-9a-fx]+) <(?P.*)> always_inline",
+ 'ArraySubscriptExpr': r"^ (?P[0-9a-fx]+) <(?P.*)> '(?P.*?)' (?P.*)",
+ 'AsmLabelAttr': r"^ (?P[0-9a-fx]+) <(?P.*)> \"(?P.+)\"",
+ 'AvailabilityAttr': r"^ (?P[0-9a-fx]+) <(?P.*)> (?P\w+) (?P[\d.]+) (?P[\d.]+) (?P[\d.]+) (?P\".*?\"|\w+) (?P\".*?\"|\w+)",
+ 'BinaryOperator': r"^ (?P[0-9a-fx]+) <(?P.*)> '(?P.*?)' '(?P.*?)'",
+ 'BreakStmt': r"^ (?P[0-9a-fx]+) <(?P.*)>",
+ 'BuiltinType': r'^ (?P[0-9a-fx]+) \'(?P.*)\'',
+ 'CallExpr': r'^ (?P[0-9a-fx]+) <(?P.*)> \'(?P.*?)\'',
+ 'CharacterLiteral': r"^ (?P[0-9a-fx]+) <(?P.*)> '(?P.*?)' (?P\d+)",
+ 'CompoundStmt': r'^ (?P[0-9a-fx]+) <(?P.*)>',
+ 'ConstantArrayType': r'^ (?P[0-9a-fx]+) \'(?P.*)\' (?P\d+)',
+ 'CStyleCastExpr': r"^ (?P[0-9a-fx]+) <(?P.*)> '(?P.*?)' <(?P.*)>",
+ 'DeclRefExpr': r"^ (?P[0-9a-fx]+) <(?P.*)> '(?P.*?)'.*? (lvalue (?P\w+)|Function) (?P[0-9a-fx]+) '(?P.*?)' '(?P.*?)'",
+ 'DeclStmt': r"^ (?P[0-9a-fx]+) <(?P.*)>",
+ 'DeprecatedAttr': r"^ (?P[0-9a-fx]+) <(?P.*)> \"(?P.*?)\" \"(?P.*?)\"",
+ 'ElaboratedType': r'^ (?P[0-9a-fx]+) \'(?P.*)\' (?P.+)',
+ 'FieldDecl': r"^ (?P[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?P.*?)(?P\w+?) '(?P.+?)'",
+ 'FloatingLiteral': r"^ (?P[0-9a-fx]+) <(?P.*)> '(?P.*)' (?P.+)",
+ 'FormatAttr': r'^ (?P[0-9a-fx]+) <(?P.*)>(?P Implicit)? (?P\w+) (?P\d+) (?P\d+)',
+ 'ForStmt': r"^ (?P[0-9a-fx]+) <(?P.*)>",
+ 'FunctionDecl': r"^ (?P[0-9a-fx]+) (?Pprev [0-9a-fx]+)? ?<(?P.*)> (?P[^ ]+)(?P implicit)?(?P used)? (?P\w+) '(?P.*)'(?P extern)?",
+ 'IfStmt': r'^ (?P[0-9a-fx]+) <(?P.*)>',
+ 'ImplicitCastExpr': r'^ (?P[0-9a-fx]+) <(?P.*)> \'(?P.*)\' <(?P.*)>',
+ 'IntegerLiteral': r'^ (?P[0-9a-fx]+) <(?P.*)> \'(?P.*)\' (?P.+)',
+ 'MemberExpr': r"^ (?P[0-9a-fx]+) <(?P.*)> '(?P.*?)' (?P.*?)(?P\w+) (?P[0-9a-fx]+)",
+ 'ParenExpr': r'^ (?P[0-9a-fx]+) <(?P.*)> \'(?P.*?)\'',
+ 'ParmVarDecl': r"^ (?P[0-9a-fx]+) <(?P.*)> (?P.+?)(?P \w+)? '(?P.*?)'(?P:'.*?')?",
+ 'PointerType': r'^ (?P[0-9a-fx]+) \'(?P.*)\'',
+ 'Record': r'^ (?P[0-9a-fx]+) \'(?P.*)\'',
+ 'RecordDecl': r"^ (?P[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?Pstruct|union) (?P\w+)",
+ 'RecordType': r'^ (?P[0-9a-fx]+) \'(?P.*)\'',
+ 'ReturnStmt': r'^ (?P[0-9a-fx]+) <(?P.*)>',
+ 'StringLiteral': r'^ (?P[0-9a-fx]+) <(?P.*)> \'(?P.*)\'(?P lvalue)? (?P.*)',
+ 'TranslationUnitDecl': r'^ (?P[0-9a-fx]+)',
+ 'Typedef': r'^ (?P[0-9a-fx]+) \'(?P.*)\'',
+ 'TypedefDecl': r'^ (?P[0-9a-fx]+) <(?P.+?)> (?P|[^ ]+)(?P.*?) (?P\w+) \'(?P.*?)\'(?P:\'.*?\')?',
+ 'TypedefType': r'^ (?P[0-9a-fx]+) \'(?P.*)\' (?P.+)',
+ 'UnaryOperator': r"^ (?P[0-9a-fx]+) <(?P.*)> '(?P.*?)'(?P lvalue)?(?P prefix)?(?P postfix)? '(?P.*?)'",
+ 'VarDecl': r"^ (?P[0-9a-fx]+) <(?P.*)> (?P[^ ]+) (?P.+) '(?P.+?)'.*?(?P.*)",
+ 'WhileStmt': r"^ (?P[0-9a-fx]+) <(?P.*)>",
+}
+
+def build_tree(nodes, depth):
+ """Convert an array of nodes, each prefixed with a depth into a tree."""
+ if len(nodes) == 0:
+ return []
+
+ # Split the list into sections, treat each section as a a tree with its own
+ # root.
+ sections = []
+ for node in nodes:
+ if node[0] == depth:
+ sections.append([node])
+ else:
+ sections[-1].append(node)
+
+ results = []
+ for section in sections:
+ children = build_tree([n for n in section if n[0] > depth], depth + 1)
+ result = section[0][1]
+
+ if len(children) > 0:
+ result['children'] = children
+
+ results.append(result)
+
+ return results
+
+def read_ast():
+ stdin = sys.stdin.read()
+ uncolored = re.sub(r'\x1b\[[\d;]+m', '', stdin)
+ return uncolored.split("\n")
+
+def convert_lines_to_nodes(lines):
+ nodes = []
+ for line in lines:
+ if line.strip() == '':
+ continue
+
+ # This will need to be handled more gracefully... I'm not even sure
+ # what this means?
+ if '<<>>' in line:
+ continue
+
+ indent_and_type = re.search(r'^([|\- `]*)(\w+)', line)
+ if indent_and_type is None:
+ print("Can not understand line '%s'" % line)
+ sys.exit(1)
+
+ node_type = indent_and_type.group(2)
+ # if node_type == 'FieldDecl':
+ # print(line[offset:])
+
+ offset = len(indent_and_type.group(0))
+ try:
+ result = re.search(regex[node_type], line[offset:])
+ except KeyError:
+ print("There is no regex for '%s'." % node_type)
+ print("I will print out all the lines so a regex can be created:\n")
+
+ for line in lines:
+ s = re.search(r'^([|\- `]*)(\w+)', line)
+ if s is not None and node_type == s.group(2):
+ print(line[offset:])
+
+ sys.exit(1)
+
+ if result is None:
+ print("Can not understand line '%s'" % line)
+ sys.exit(1)
+
+ node = result.groupdict()
+
+ node['node'] = node_type
+
+ indent_level = len(indent_and_type.group(1)) / 2
+ nodes.append([indent_level, node])
+
+ return nodes
+
+lines = read_ast()
+nodes = convert_lines_to_nodes(lines)
+tree = build_tree(nodes, 0)
+
+print(json.dumps(tree, sort_keys=True, indent=2, separators=(',', ': ')))
diff --git a/c2go.py b/c2go.py
index 614c00f9a..159e0de77 100644
--- a/c2go.py
+++ b/c2go.py
@@ -2,10 +2,11 @@
# -*- coding: utf-8 -*-
import sys
-import clang.cindex
import pprint
import re
import subprocess
+import json
+
try:
import StringIO as io
except ImportError:
@@ -40,13 +41,14 @@ def is_identifier(w):
def resolve_type(s):
s = s.strip()
- if s == 'const char *' or s == 'const char*' or s == 'char *' or s == 'const char *restrict':
+ if s == 'const char *' or s == 'const char*' or s == 'char *' or \
+ s == 'const char *restrict' or s == 'const char *__restrict':
return 'string'
if s == 'float':
return 'float32'
- if s == 'void *':
+ if s == 'void *' or s == '__darwin_pthread_handler_rec *':
return 'interface{}'
if s == 'char':
@@ -61,7 +63,7 @@ def resolve_type(s):
if s == 'int' or s == '__darwin_ct_rune_t':
return s
- if s == 'long':
+ if s == 'long' or s == '__mbstate_t' or s == '__builtin_va_list':
return 'int64'
if s == 'long long':
@@ -91,6 +93,9 @@ def resolve_type(s):
if s == 'long int':
return 'int32'
+ if s == '__int128':
+ return 'int64'
+
if re.match('unsigned char \\[\\d+\\]', s):
return s[14:] + 'byte'
@@ -106,9 +111,12 @@ def resolve_type(s):
if '(*)' in s or s == '__sFILEX *' or s == 'fpos_t':
return "interface{}"
+ if '(' in s:
+ return 'interface{}'
+
return s
- raise Exception('Cannot resolve type "%s"' % s)
+ # raise Exception('Cannot resolve type "%s"' % s)
def cast(expr, from_type, to_type):
from_type = resolve_type(from_type)
@@ -133,15 +141,9 @@ def print_line(out, line, indent):
out.write('%s%s\n' % ('\t' * indent, line))
def render_expression(node):
- if node.kind.name == 'BINARY_OPERATOR':
- end_of_left = list(node.get_children())[0].extent.end.column
- operator = None
- for t in node.get_tokens():
- if t.extent.start.column >= end_of_left:
- operator = t.spelling
- break
-
- left, right = [render_expression(t)[0] for t in list(node.get_children())]
+ if node['node'] == 'BinaryOperator':
+ operator = node['operator']
+ left, right = [render_expression(t)[0] for t in node['children']]
return_type = 'bool'
if operator == '|' or operator == '&':
@@ -149,28 +151,16 @@ def render_expression(node):
return '%s %s %s' % (left, operator, right), return_type
- if node.kind.name == 'CONDITIONAL_OPERATOR':
+ if node['node'] == 'CONDITIONAL_OPERATOR':
a, b, c = [render_expression(t) for t in list(node.get_children())]
try:
return '__ternary(%s, %s, %s)' % (cast(a[0], 'bool'), b[0], c[0]), b[1]
except TypeError:
return '// CONDITIONAL_OPERATOR: %s' % ''.join([t.spelling for t in node.get_tokens()]), 'unknown'
- if node.kind.name == 'UNARY_OPERATOR':
- # print(children[2].kind.name)
-
- expr_start = list(node.get_children())[0].extent.start.column
- operator = None
- for t in node.get_tokens():
- if t.extent.start.column >= expr_start:
- break
-
- operator = t.spelling
-
- if operator is None:
- operator = '++'
-
- expr = render_expression(list(node.get_children())[0])
+ if node['node'] == 'UnaryOperator':
+ operator = node['operator']
+ expr = render_expression(node['children'][0])
if operator == '!':
return '%s(%s)' % ('__not_%s' % expr[1], expr[0]), expr[1]
@@ -189,16 +179,22 @@ def render_expression(node):
return '%s%s' % (operator, expr[0]), expr[1]
- if node.kind.name == 'UNEXPOSED_EXPR':
- children = list(node.get_children())
- if len(children) < 1:
- return '// UNEXPOSED_EXPR: %s' % ''.join([t.spelling for t in node.get_tokens()]), 'unknown'
+ if node['node'] in ('CHARACTER_LITERAL', 'StringLiteral', 'FloatingLiteral'):
+ return node['value'], 'const char*'
- # if len(children) > 1:
- # raise Exception('To many children!')
+ if node['node'] == 'IntegerLiteral':
+ literal = node['value']
+ if literal[-1] == 'L':
+ literal = '%s(%s)' % (resolve_type('long'), literal[:-1])
- e = render_expression(children[0])
- name = e[0]
+ return literal, 'int'
+
+ if node['node'] == 'PAREN_EXPR':
+ e = render_expression(list(node.get_children())[0])
+ return '(%s)' % e[0], e[1]
+
+ if node['node'] == 'DeclRefExpr':
+ name = node['name']
if name == 'argc':
name = 'len(os.Args)'
@@ -207,27 +203,13 @@ def render_expression(node):
name = 'os.Args'
add_import("os")
- return name, e[1]
-
- if node.kind.name in ('CHARACTER_LITERAL', 'STRING_LITERAL', 'FLOATING_LITERAL'):
- return list(node.get_tokens())[0].spelling, 'const char*'
+ return name, node['type']
- if node.kind.name == 'INTEGER_LITERAL':
- literal = list(node.get_tokens())[0].spelling
- if literal[-1] == 'L':
- literal = '%s(%s)' % (resolve_type('long'), literal[:-1])
-
- return literal, 'int'
+ if node['node'] == 'ImplicitCastExpr':
+ return render_expression(node['children'][0])
- if node.kind.name == 'PAREN_EXPR':
- e = render_expression(list(node.get_children())[0])
- return '(%s)' % e[0], e[1]
-
- if node.kind.name == 'DECL_REF_EXPR':
- return node.spelling, node.type.spelling
-
- if node.kind.name == 'CALL_EXPR':
- children = list(node.get_children())
+ if node['node'] == 'CallExpr':
+ children = node['children']
func_name = render_expression(children[0])[0]
func_def = function_defs[func_name]
@@ -251,74 +233,77 @@ def render_expression(node):
return '%s(%s)' % (func_name, ', '.join(args)), func_def[0]
- if node.kind.name == 'ARRAY_SUBSCRIPT_EXPR':
- children = list(node.get_children())
+ if node['node'] == 'ArraySubscriptExpr':
+ children = node['children']
return '%s[%s]' % (render_expression(children[0])[0],
render_expression(children[1])[0]), 'unknown'
- if node.kind.name == 'MEMBER_REF_EXPR':
- children = list(node.get_children())
- return '%s.%s' % (render_expression(children[0])[0], list(node.get_tokens())[-2].spelling), 'unknown'
+ if node['node'] == 'MemberExpr':
+ children = node['children']
+ return '%s.%s' % (render_expression(children[0])[0], node['name']), children[0]['type']
- if node.kind.name == 'CSTYLE_CAST_EXPR':
+ if node['node'] == 'CSTYLE_CAST_EXPR':
children = list(node.get_children())
return render_expression(children[0]), 'unknown'
- if node.kind.name == 'FIELD_DECL' or node.kind.name == 'VAR_DECL':
- type = resolve_type(node.type.spelling)
- name = node.spelling
+ if node['node'] == 'FieldDecl' or node['node'] == 'VarDecl':
+ type = resolve_type(node['type'])
+ name = node['name'].replace('used', '')
prefix = ''
- if node.kind.name == 'VAR_DECL':
+ if node['node'] == 'VarDecl':
prefix = 'var '
suffix = ''
- children = list(node.get_children())
-
- # We must check the position of the child is at the end. Otherwise a
- # child can refer to another expression like the size of the data type.
- if len(children) > 0 and children[0].extent.end.column == node.extent.end.column:
- e = render_expression(children[0])
- suffix = ' = %s' % cast(e[0], e[1], type)
+ if 'children' in node:
+ children = node['children']
+ suffix = ' = %s' % render_expression(children[0])[0]
return '%s%s %s%s' % (prefix, name, type, suffix), 'unknown'
- if node.kind.name == 'PARM_DECL':
+ if node['node'] == 'PARM_DECL':
return resolve_type(node.type.spelling), 'unknown'
- return node.kind.name, 'unknown'
+ # return node['node'], 'unknown'
- #raise Exception('render_expression: %s' % node.kind)
+ raise Exception('render_expression: %s' % node['node'])
def print_children(node):
print(len(list(node.get_children())), [t.spelling for t in node.get_tokens()])
for child in node.get_children():
print(child.kind.name, render_expression(child), [t.spelling for t in child.get_tokens()])
+def get_function_params(nodes):
+ if 'children' not in nodes:
+ return []
+
+ return [n for n in nodes['children'] if n['node'] == 'ParmVarDecl']
+
def render(out, node, indent=0, return_type=None):
- if node.kind.name == 'TRANSLATION_UNIT':
- for c in node.get_children():
+ if node['node'] == 'TranslationUnitDecl':
+ for c in node['children']:
render(out, c, indent, return_type)
return
- if node.kind.name == 'FUNCTION_DECL':
- function_name = node.spelling
+ if node['node'] == 'FunctionDecl':
+ function_name = node['name']
if function_name in ('__istype', '__isctype', '__wcwidth', '__sputc'):
return
has_body = False
- for c in node.get_children():
- if c.kind.name == 'COMPOUND_STMT':
- has_body = True
+ if 'children' in node:
+ for c in node['children']:
+ if c['node'] == 'CompoundStmt':
+ has_body = True
args = []
- for a in node.get_arguments():
- args.append('%s %s' % (a.spelling, resolve_type(a.type.spelling)))
+ for a in get_function_params(node):
+ args.append('%s %s' % (a['name'], resolve_type(a['type'])))
if has_body:
- return_type = ' ' + node.result_type.spelling
- if return_type == ' void':
+ return_type = ' ' + node['type']
+ if return_type == ' void ()':
return_type = ''
if function_name == 'main':
@@ -327,27 +312,27 @@ def render(out, node, indent=0, return_type=None):
print_line(out, 'func %s(%s)%s {' % (function_name,
', '.join(args), return_type), indent)
- for c in node.get_children():
- if c.kind.name == 'COMPOUND_STMT':
- render(out, c, indent + 1, node.result_type.spelling)
+ for c in node['children']:
+ if c['node'] == 'CompoundStmt':
+ render(out, c, indent + 1, node['type'])
print_line(out, '}\n', indent)
- function_defs[node.spelling] = (node.result_type.spelling, [a.type.spelling for a in node.get_arguments()])
+ function_defs[node['name']] = (node['type'], [a['type'] for a in get_function_params(node)])
return
- if node.kind.name == 'PARM_DECL':
- print_line(out, node.spelling, indent)
- return
+ # if node['node'] == 'PARM_DECL':
+ # print_line(out, node.spelling, indent)
+ # return
- if node.kind.name == 'COMPOUND_STMT':
- for c in node.get_children():
+ if node['node'] == 'CompoundStmt':
+ for c in node['children']:
render(out, c, indent, return_type)
return
- if node.kind.name == 'IF_STMT':
- children = list(node.get_children())
+ if node['node'] == 'IfStmt':
+ children = node['children']
e = render_expression(children[0])
print_line(out, 'if %s {' % cast(e[0], e[1], 'bool'), indent)
@@ -362,8 +347,8 @@ def render(out, node, indent=0, return_type=None):
return
- if node.kind.name == 'WHILE_STMT':
- children = list(node.get_children())
+ if node['node'] == 'WhileStmt':
+ children = node['children']
e = render_expression(children[0])
print_line(out, 'for %s {' % cast(e[0], e[1], 'bool'), indent)
@@ -374,8 +359,8 @@ def render(out, node, indent=0, return_type=None):
return
- if node.kind.name == 'FOR_STMT':
- children = list(node.get_children())
+ if node['node'] == 'ForStmt':
+ children = node['children']
a, b, c = [render_expression(e)[0] for e in children[:3]]
print_line(out, 'for %s; %s; %s {' % (a, b, c), indent)
@@ -386,23 +371,15 @@ def render(out, node, indent=0, return_type=None):
return
- if node.kind.name == 'BREAK_STMT':
+ if node['node'] == 'BreakStmt':
print_line(out, 'break', indent)
return
- if node.kind.name == 'UNARY_OPERATOR':
- variable, operator = [t.spelling for t in list(node.get_tokens())[0:2]]
- if operator == '++':
- print_line(out, '%s += 1' % variable, indent)
- #print_line(out, '%s = string(%s[1:])' % (variable, variable), indent)
- return
-
- print_line(out, '%s%s' % (operator, variable), indent)
+ if node['node'] == 'UnaryOperator':
+ print_line(out, render_expression(node)[0], indent)
return
- #raise Exception('UNARY_OPERATOR: %s' % operator)
-
- if node.kind.name == 'RETURN_STMT':
+ if node['node'] == 'ReturnStmt':
# try:
# e = render_expression(list(node.get_children())[0])
# print_line(out, 'return %s' % cast(e[0], e[1], return_type), indent)
@@ -411,11 +388,21 @@ def render(out, node, indent=0, return_type=None):
return
- if node.kind.name in ('BINARY_OPERATOR', 'INTEGER_LITERAL', 'CALL_EXPR'):
+ if node['node'] in ('BinaryOperator', 'INTEGER_LITERAL', 'CallExpr'):
print_line(out, render_expression(node)[0], indent)
return
- if node.kind.name == 'TYPEDEF_DECL':
+ if node['node'] == 'TypedefDecl':
+ # FIXME: All of the logic here is just to avoid errors, it needs to be
+ # fixed up.
+ if 'struct' in node['type'] or 'union' in node['type']:
+ return
+ node['type'] = node['type'].replace('unsigned', '')
+
+ print_line(out, "type %s %s\n" % (node['name'], resolve_type(node['type'])), indent)
+ # print(node)
+ return
+
tokens = [t.spelling for t in node.get_tokens()]
if len(list(node.get_children())) == 0:
print_line(out, "type %s %s\n" % (tokens[-2], resolve_type(' '.join(tokens[1:-2]))), indent)
@@ -424,67 +411,78 @@ def render(out, node, indent=0, return_type=None):
return
- if node.kind.name == 'UNION_DECL' or node.kind.name == 'STRUCT_DECL':
- tokens = [t.spelling for t in node.get_tokens()]
-
- struct_name = tokens[-1]
- start_at = 2
- if struct_name == ';':
- struct_name = tokens[1]
- start_at = 3
-
- if struct_name in ('__darwin_pthread_handler_rec', '_opaque_pthread_t',
- '_RuneEntry', '_RuneRange', '_RuneCharClass', '_RuneLocale'):
+ if node['node'] == 'RecordDecl':
+ if node['kind'] == 'union':
return
- print_line(out, "type %s struct {" % struct_name, indent)
-
- for attribute in node.get_children():
- print_line(out, render_expression(attribute)[0], indent + 1)
- # print(struct_name, render_expression(attribute))
-
- # name = ''
- # type = ''
- # for token in tokens[start_at:-2]:
- # if token == ';':
- # print_line(out, '%s %s' % (name, resolve_type(type)), indent + 1)
- # type = ''
- # elif is_identifier(token):
- # name = token
- # else:
- # type += ' ' + token
-
+ print_line(out, "type %s %s {" % (node['name'], node['kind']), indent)
+ if 'children' in node:
+ for c in node['children']:
+ print_line(out, render_expression(c)[0], indent + 1)
print_line(out, "}\n", indent)
return
- if node.kind.name == 'UNEXPOSED_DECL':
- tokens = [t.spelling for t in node.get_tokens()]
- print_line(out, '// ' + ' '.join(tokens[1:-2]), indent)
- return
-
- if node.kind.name == 'DECL_STMT':
- for child in node.get_children():
+ #if node['node'] == 'UNION_DECL' or node['node'] == 'STRUCT_DECL':
+ # tokens = [t.spelling for t in node.get_tokens()]
+
+ # struct_name = tokens[-1]
+ # start_at = 2
+ # if struct_name == ';':
+ # struct_name = tokens[1]
+ # start_at = 3
+
+ # if struct_name in ('__darwin_pthread_handler_rec', '_opaque_pthread_t',
+ # '_RuneEntry', '_RuneRange', '_RuneCharClass', '_RuneLocale'):
+ # return
+
+ # print_line(out, "type %s struct {" % struct_name, indent)
+
+ # for attribute in node.get_children():
+ # print_line(out, render_expression(attribute)[0], indent + 1)
+ # # print(struct_name, render_expression(attribute))
+
+ # # name = ''
+ # # type = ''
+ # # for token in tokens[start_at:-2]:
+ # # if token == ';':
+ # # print_line(out, '%s %s' % (name, resolve_type(type)), indent + 1)
+ # # type = ''
+ # # elif is_identifier(token):
+ # # name = token
+ # # else:
+ # # type += ' ' + token
+
+ # print_line(out, "}\n", indent)
+ # return
+
+ # if node['node'] == 'UNEXPOSED_DECL':
+ # tokens = [t.spelling for t in node.get_tokens()]
+ # print_line(out, '// ' + ' '.join(tokens[1:-2]), indent)
+ # return
+
+ if node['node'] == 'DeclStmt':
+ for child in node['children']:
print_line(out, render_expression(child)[0], indent)
return
- if node.kind.name == 'VAR_DECL':
- tokens = [t.spelling for t in node.get_tokens()]
- if tokens[0] == 'extern':
- return
+ if node['node'] == 'VarDecl':
+ # tokens = [t.spelling for t in node.get_tokens()]
+ # if tokens[0] == 'extern':
+ # return
- children = list(node.get_children())
- if len(children) > 0:
- print_line(out, 'var %s %s = %s\n' % (tokens[2], tokens[1], render_expression(children[0])[0]), indent)
- else:
- print_line(out, 'var %s %s\n' % (tokens[2], tokens[1]), indent)
+ # children = list(node.get_children())
+ # if len(children) > 0:
+ # print_line(out, 'var %s %s = %s\n' % (tokens[2], tokens[1], render_expression(children[0])[0]), indent)
+ # else:
+ # print_line(out, 'var %s %s\n' % (tokens[2], tokens[1]), indent)
return
- if node.kind.name == 'ENUM_DECL':
- print_line(out, '// enum', indent)
- return
+ # if node['node'] == 'ENUM_DECL':
+ # print_line(out, '// enum', indent)
+ # return
- raise Exception(node.kind)
+ raise Exception(node['node'])
# 1. Compile it first (checking for errors)
c_file_path = sys.argv[1]
@@ -497,24 +495,33 @@ def render(out, node, indent=0, return_type=None):
with open(pp_file_path, 'wb') as pp_out:
pp_out.write(pp)
-# 3. Parse C and output Go
-index = clang.cindex.Index.create()
-tu = index.parse(pp_file_path)
-
-go_file_path = '%s.go' % c_file_path.split('/')[-1][:-2]
-# go_out = sys.stdout
-go_out = io.StringIO()
-#with open(go_file_path, 'w') as go_out:
-# print_line(go_out, "package main\n", 0)
-#print_line(go_out, 'import ("fmt"; "os")\n', 0)
-render(go_out, tu.cursor)
-
-print("package main\n")
-print("import (")
-for import_name in sorted(imports):
- print('\t"%s"' % import_name)
-print(")\n")
-print(go_out.getvalue())
-
-# 4. Compile the Go
-#subprocess.call(["go", "run", "functions.go", go_file_path])
+# 3. Generate JSON from AST
+ast_pp = subprocess.Popen(["clang", "-Xclang", "-ast-dump", "-fsyntax-only", pp_file_path], stdout=subprocess.PIPE)
+pp = subprocess.Popen(["python", "ast2json.py"], stdin=ast_pp.stdout, stdout=subprocess.PIPE).communicate()[0]
+
+json_file_path = 'pp.json'
+with open(json_file_path, 'w') as json_out:
+ json_out.write(pp)
+
+with open(json_file_path, 'r') as json_in:
+ # 3. Parse C and output Go
+ # index = clang.cindex.Index.create()
+ # tu = index.parse(pp_file_path)
+
+ go_file_path = '%s.go' % c_file_path.split('/')[-1][:-2]
+ # go_out = sys.stdout
+ go_out = io.StringIO()
+ #with open(go_file_path, 'w') as go_out:
+ # print_line(go_out, "package main\n", 0)
+ #print_line(go_out, 'import ("fmt"; "os")\n', 0)
+ render(go_out, json.loads(json_in.read())[0])
+
+ print("package main\n")
+ print("import (")
+ for import_name in sorted(imports):
+ print('\t"%s"' % import_name)
+ print(")\n")
+ print(go_out.getvalue())
+
+ # 4. Compile the Go
+ #subprocess.call(["go", "run", "functions.go", go_file_path])