From ae1ae1a1e9fedbad841d6128dac9e03406b54e78 Mon Sep 17 00:00:00 2001
From: Elliot Chance <elliotchance@gmail.com>
Date: Fri, 24 Mar 2017 08:19:47 +1100
Subject: [PATCH 1/5] Added a new script for converting clang output AST to
 JSON

---
 .gitignore  |   1 +
 ast2json.py | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 143 insertions(+)
 create mode 100644 ast2json.py
diff --git a/.gitignore b/.gitignore
index 99284e498..835db24c1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 /pp.c
 /out.go
 /a.out
+/pp.json
diff --git a/ast2json.py b/ast2json.py
new file mode 100644
index 000000000..5aa3bd625
--- /dev/null
+++ b/ast2json.py
@@ -0,0 +1,142 @@
+import sys
+import re
+import json
+
+# This script converts the output of clang AST into a JSON file.
+# 
+# Usage:
+#   clang -Xclang -ast-dump -fsyntax-only myfile.c | python ast2json.py
+# 
+# Yes, there are many better ways to do this. However I chose this method
+# because:
+# 
+# 1. I need to separate the clang AST from the c2go conversion process so that
+#    the c2go program can ingest a reliable JSON file and not depend on clang or
+#    its different versions at all.
+# 2. The clang API is not stable and trying to match up binaries with different
+#    versions and operating systems can be tricky and brittle.
+# 3. This tool, in time, will become a better binary of some kind that produces
+#    much the same JSON output (so minimal changes to c2go.py).
+# 4. I needed something quick and dirty to proof the complete toolchain and get
+#    it working on different versions of clang and different operating systems
+#    before we enough information to really standardise the process.
+
+regex = {
+    'AlwaysInlineAttr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> always_inline",
+    'AsmLabelAttr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \"(?P<function>.+)\"",
+    'AvailabilityAttr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<os>\w+) (?P<version>[\d.]+) (?P<unknown1>[\d.]+) (?P<unknown2>[\d.]+) (?P<unknown3>\".*?\"|\w+) (?P<unknown4>\".*?\"|\w+)",
+    'TranslationUnitDecl': r'^ (?P<address>[0-9a-fx]+)',
+    'IntegerLiteral': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*)\' (?P<value>\d+)',
+    'TypedefDecl': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.+?)> (?P<position2><invalid sloc>|[^ ]+)(?P<tags>.*?) (?P<name>\w+) \'(?P<type>.*?)\'(?P<type2>:\'.*?\')?',
+    'BuiltinType': r'^ (?P<address>[0-9a-fx]+) \'(?P<name>.*)\'',
+    'ReturnStmt': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>',
+    'StringLiteral': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*)\' (?P<value>.*)',
+    'ImplicitCastExpr': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*)\' <(?P<kind>.*)>',
+    'DeclRefExpr': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*?)\' (?P<unknown>.*)',
+    'CallExpr': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*?)\'',
+    'ParenExpr': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*?)\'',
+    'CompoundStmt': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>',
+    'IfStmt': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>',
+    'FunctionDecl': r'^ (?P<address>[0-9a-fx]+) (?P<prev>prev [0-9a-fx]+)? ?<(?P<position1>.*)> (?P<position2>[^ ]+)(?P<tags1> implicit)?(?P<tags2> used)? (?P<name>\w+) \'(?P<type>.*)\'(?P<tags3> extern)?',
+    'ParmVarDecl': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>.+) \'(?P<type>.*?)\'(?P<type2>:\'.*?\')?',
+    'FormatAttr': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>(?P<tags> Implicit)? (?P<function>\w+) (?P<unknown1>\d+) (?P<unknown2>\d+)',
+    'RecordType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\'',
+    'Record': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\'',
+    'PointerType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\'',
+    'Typedef': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\'',
+    'ConstantArrayType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\' (?P<size>\d+)',
+    'RecordDecl': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>[^ ]+) (?P<type>.+)',
+    'FieldDecl': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>[^ ]+) (?P<type>.+)',
+    'ElaboratedType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\' (?P<tags>.+)',
+    'TypedefType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\' (?P<tags>.+)',
+    'VarDecl': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>[^ ]+) (?P<name>.+) '(?P<type>.+)'(?P<tags>.*)",
+    'DeprecatedAttr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \"(?P<message1>.*?)\" \"(?P<message2>.*?)\"",
+    'BinaryOperator': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)' '(?P<operator>.*?)'",
+    'MemberExpr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)' lvalue ->(?P<name>\w+) (?P<address2>[0-9a-fx]+)",
+    'CStyleCastExpr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)' <(?P<kind>.*)>",
+    'CharacterLiteral': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)' (?P<value>\d+)",
+    'UnaryOperator': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)'(?P<tags1> lvalue)?(?P<tags2> prefix)?(?P<tags3> postfix)? '(?P<operator>.*?)'",
+    'DeclStmt': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>",
+    'ForStmt': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>",
+    'BreakStmt': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>",
+}
+
+def build_tree(nodes, depth):
+    """Convert an array of nodes, each prefixed with a depth into a tree."""
+    if len(nodes) == 0:
+        return []
+
+    # Split the list into sections, treat each section as a a tree with its own
+    # root.
+    sections = []
+    for node in nodes:
+        if node[0] == depth:
+            sections.append([node])
+        else:
+            sections[-1].append(node)
+
+    results = []
+    for section in sections:
+        children = build_tree([n for n in section if n[0] > depth], depth + 1)
+        result = section[0][1]
+
+        if len(children) > 0:
+            result['children'] = children
+
+        results.append(result)
+
+    return results
+
+def read_ast():
+    stdin = sys.stdin.read()
+    uncolored = re.sub(r'\x1b\[[\d;]+m', '', stdin)
+    return uncolored.split("\n")
+
+def convert_lines_to_nodes(lines):
+    nodes = []
+    for line in lines:
+        if line.strip() == '':
+            continue
+
+        # This will need to be handled more gracefully...  I'm not even sure
+        # what this means?
+        if '<<<NULL>>>' in line:
+            continue
+
+        indent_and_type = re.search(r'^([|\- `]*)(\w+)', line)
+        if indent_and_type is None:
+            print("Can not understand line '%s'" % line)
+            sys.exit(1)
+
+        offset = len(indent_and_type.group(0))
+        try:
+            result = re.search(regex[indent_and_type.group(2)], line[offset:])
+        except KeyError:
+            print("There is no regex for '%s'." % indent_and_type.group(2))
+            print("I will print out all the lines so a regex can be created:\n")
+
+            for line in lines:
+                s = re.search(r'^([|\- `]*)(\w+)', line)
+                if s is not None and indent_and_type.group(2) == s.group(2):
+                    print(line[offset:])
+
+            sys.exit(1)
+
+        if result is None:
+            print("Can not understand line '%s'" % line)
+            sys.exit(1)
+
+        node = result.groupdict()
+
+        node['node'] = indent_and_type.group(2)
+
+        indent_level = len(indent_and_type.group(1)) / 2
+        nodes.append([indent_level, node])
+
+    return nodes
+
+lines = read_ast()
+nodes = convert_lines_to_nodes(lines)
+tree = build_tree(nodes, 0)
+
+print(json.dumps(tree, sort_keys=True, indent=2, separators=(',', ': ')))

From ddcca04fd7b8b26e7497636978e0edd2ff5f379d Mon Sep 17 00:00:00 2001
From: Elliot Chance <elliotchance@gmail.com>
Date: Fri, 24 Mar 2017 08:21:20 +1100
Subject: [PATCH 2/5] Some refactoring of c2go to take in the new JSON

---
 c2go.py | 334 ++++++++++++++++++++++++++++++--------------------------
 1 file changed, 181 insertions(+), 153 deletions(-)

diff --git a/c2go.py b/c2go.py
index 47ed3c595..a2180c452 100644
--- a/c2go.py
+++ b/c2go.py
@@ -1,9 +1,9 @@
 import sys
-import clang.cindex
 import pprint
 import re
 import subprocess
 import StringIO
+import json
 
 function_defs = {
     '__istype': ('uint32', ('__darwin_ct_rune_t', 'uint32')),
@@ -127,7 +127,7 @@ def print_line(out, line, indent):
     out.write('%s%s\n' % ('\t' * indent, line))
 
 def render_expression(node):
-    if node.kind.name == 'BINARY_OPERATOR':
+    if node['node'] == 'BINARY_OPERATOR':
         end_of_left = list(node.get_children())[0].extent.end.column
         operator = None
         for t in node.get_tokens():
@@ -143,14 +143,14 @@ def render_expression(node):
 
         return '%s %s %s' % (left, operator, right), return_type
 
-    if node.kind.name == 'CONDITIONAL_OPERATOR':
+    if node['node'] == 'CONDITIONAL_OPERATOR':
         a, b, c = [render_expression(t) for t in list(node.get_children())]
         try:
             return '__ternary(%s, %s, %s)' % (cast(a[0], 'bool'), b[0], c[0]), b[1]
         except TypeError:
             return '// CONDITIONAL_OPERATOR: %s' % ''.join([t.spelling for t in node.get_tokens()]), 'unknown'
 
-    if node.kind.name == 'UNARY_OPERATOR':
+    if node['node'] == 'UNARY_OPERATOR':
         # print(children[2].kind.name)
 
         expr_start = list(node.get_children())[0].extent.start.column
@@ -183,7 +183,7 @@ def render_expression(node):
 
         return '%s%s' % (operator, expr[0]), expr[1]
 
-    if node.kind.name == 'UNEXPOSED_EXPR':
+    if node['node'] == 'UNEXPOSED_EXPR':
         children = list(node.get_children())
         if len(children) < 1:
             return '// UNEXPOSED_EXPR: %s' % ''.join([t.spelling for t in node.get_tokens()]), 'unknown'
@@ -203,25 +203,28 @@ def render_expression(node):
 
         return name, e[1]
 
-    if node.kind.name in ('CHARACTER_LITERAL', 'STRING_LITERAL', 'FLOATING_LITERAL'):
+    if node['node'] in ('CHARACTER_LITERAL', 'STRING_LITERAL', 'FLOATING_LITERAL'):
         return list(node.get_tokens())[0].spelling, 'const char*'
 
-    if node.kind.name == 'INTEGER_LITERAL':
+    if node['node'] == 'INTEGER_LITERAL':
         literal = list(node.get_tokens())[0].spelling
         if literal[-1] == 'L':
             literal = '%s(%s)' % (resolve_type('long'), literal[:-1])
 
         return literal, 'int'
 
-    if node.kind.name == 'PAREN_EXPR':
+    if node['node'] == 'PAREN_EXPR':
         e = render_expression(list(node.get_children())[0])
         return '(%s)' % e[0], e[1]
 
-    if node.kind.name == 'DECL_REF_EXPR':
-        return node.spelling, node.type.spelling
+    if node['node'] == 'DeclRefExpr':
+        return node['unknown'], node['type']
 
-    if node.kind.name == 'CALL_EXPR':
-        children = list(node.get_children())
+    if node['node'] == 'ImplicitCastExpr':
+        return render_expression(node['children'][0])
+
+    if node['node'] == 'CallExpr':
+        children = node['children']
         func_name = render_expression(children[0])[0]
 
         func_def = function_defs[func_name]
@@ -245,25 +248,25 @@ def render_expression(node):
 
         return '%s(%s)' % (func_name, ', '.join(args)), func_def[0]
 
-    if node.kind.name == 'ARRAY_SUBSCRIPT_EXPR':
+    if node['node'] == 'ARRAY_SUBSCRIPT_EXPR':
         children = list(node.get_children())
         return '%s[%s]' % (render_expression(children[0])[0],
             render_expression(children[1])[0]), 'unknown'
 
-    if node.kind.name == 'MEMBER_REF_EXPR':
+    if node['node'] == 'MEMBER_REF_EXPR':
         children = list(node.get_children())
         return '%s.%s' % (render_expression(children[0])[0], list(node.get_tokens())[-2].spelling), 'unknown'
 
-    if node.kind.name == 'CSTYLE_CAST_EXPR':
+    if node['node'] == 'CSTYLE_CAST_EXPR':
         children = list(node.get_children())
         return render_expression(children[0]), 'unknown'
 
-    if node.kind.name == 'FIELD_DECL' or node.kind.name == 'VAR_DECL':
+    if node['node'] == 'FIELD_DECL' or node['node'] == 'VAR_DECL':
         type = resolve_type(node.type.spelling)
         name = node.spelling
 
         prefix = ''
-        if node.kind.name == 'VAR_DECL':
+        if node['node'] == 'VAR_DECL':
             prefix = 'var '
 
         suffix = ''
@@ -277,10 +280,10 @@ def render_expression(node):
 
         return '%s%s %s%s' % (prefix, name, type, suffix), 'unknown'
 
-    if node.kind.name == 'PARM_DECL':
+    if node['node'] == 'PARM_DECL':
         return resolve_type(node.type.spelling), 'unknown'
 
-    return node.kind.name, 'unknown'
+    return node['node'], 'unknown'
 
     #raise Exception('render_expression: %s' % node.kind)
 
@@ -289,29 +292,38 @@ def print_children(node):
     for child in node.get_children():
         print(child.kind.name, render_expression(child), [t.spelling for t in child.get_tokens()])
 
+def get_function_params(nodes):
+    if 'children' not in nodes:
+        return []
+
+    return [n for n in nodes['children'] if n['node'] == 'ParmVarDecl']
+
 def render(out, node, indent=0, return_type=None):
-    if node.kind.name == 'TRANSLATION_UNIT':
-        for c in node.get_children():
+    if node['node'] == 'TranslationUnitDecl':
+        for c in node['children']:
             render(out, c, indent, return_type)
         return
 
-    if node.kind.name == 'FUNCTION_DECL':
-        function_name = node.spelling
+    if node['node'] == 'FunctionDecl':
+        function_name = node['name']
 
         if function_name in ('__istype', '__isctype', '__wcwidth', '__sputc'):
             return
 
         has_body = False
-        for c in node.get_children():
-            if c.kind.name == 'COMPOUND_STMT':
-                has_body = True
+        if 'children' in node:
+            for c in node['children']:
+                if c['node'] == 'CompoundStmt':
+                    has_body = True
+            # print(function_name)
+            # print(json.dumps(node['children']))
 
         args = []
-        for a in node.get_arguments():
-            args.append('%s %s' % (a.spelling, resolve_type(a.type.spelling)))
+        # for a in get_function_params(node):
+        #     args.append('%s %s' % (a['name'], resolve_type(a['type'])))
 
         if has_body:
-            return_type = ' ' + node.result_type.spelling
+            return_type = ' ' + node['type']
             if return_type == ' void':
                 return_type = ''
 
@@ -321,82 +333,82 @@ def render(out, node, indent=0, return_type=None):
                 print_line(out, 'func %s(%s)%s {' % (function_name,
                     ', '.join(args), return_type), indent)
             
-            for c in node.get_children():
-                if c.kind.name == 'COMPOUND_STMT':
-                    render(out, c, indent + 1, node.result_type.spelling)
+            for c in node['children']:
+                if c['node'] == 'CompoundStmt':
+                    render(out, c, indent + 1, node['type'])
 
             print_line(out, '}\n', indent)
 
-        function_defs[node.spelling] = (node.result_type.spelling, [a.type.spelling for a in node.get_arguments()])
+    #     function_defs[node.spelling] = (node.result_type.spelling, [a.type.spelling for a in node.get_arguments()])
 
         return
 
-    if node.kind.name == 'PARM_DECL':
-        print_line(out, node.spelling, indent)
-        return
+    # if node['node'] == 'PARM_DECL':
+    #     print_line(out, node.spelling, indent)
+    #     return
 
-    if node.kind.name == 'COMPOUND_STMT':
-        for c in node.get_children():
+    if node['node'] == 'CompoundStmt':
+        for c in node['children']:
             render(out, c, indent, return_type)
         return
 
-    if node.kind.name == 'IF_STMT':
-        children = list(node.get_children())
+    # if node['node'] == 'IF_STMT':
+    #     children = list(node.get_children())
 
-        e = render_expression(children[0])
-        print_line(out, 'if %s {' % cast(e[0], e[1], 'bool'), indent)
+    #     e = render_expression(children[0])
+    #     print_line(out, 'if %s {' % cast(e[0], e[1], 'bool'), indent)
 
-        render(out, children[1], indent + 1, return_type)
+    #     render(out, children[1], indent + 1, return_type)
 
-        if len(children) > 2:
-            print_line(out, '} else {', indent)
-            render(out, children[2], indent + 1, return_type)
+    #     if len(children) > 2:
+    #         print_line(out, '} else {', indent)
+    #         render(out, children[2], indent + 1, return_type)
 
-        print_line(out, '}', indent)
+    #     print_line(out, '}', indent)
 
-        return
+        # return
 
-    if node.kind.name == 'WHILE_STMT':
-        children = list(node.get_children())
+    # if node['node'] == 'WHILE_STMT':
+    #     children = list(node.get_children())
 
-        e = render_expression(children[0])
-        print_line(out, 'for %s {' % cast(e[0], e[1], 'bool'), indent)
+    #     e = render_expression(children[0])
+    #     print_line(out, 'for %s {' % cast(e[0], e[1], 'bool'), indent)
 
-        render(out, children[1], indent + 1, return_type)
+    #     render(out, children[1], indent + 1, return_type)
 
-        print_line(out, '}', indent)
+    #     print_line(out, '}', indent)
 
-        return
+    #     return
 
-    if node.kind.name == 'FOR_STMT':
-        children = list(node.get_children())
+    # if node['node'] == 'FOR_STMT':
+    #     children = list(node.get_children())
 
-        a, b, c = [render_expression(e)[0] for e in children[:3]]
-        print_line(out, 'for %s; %s; %s {' % (a, b, c), indent)
+    #     a, b, c = [render_expression(e)[0] for e in children[:3]]
+    #     print_line(out, 'for %s; %s; %s {' % (a, b, c), indent)
 
-        render(out, children[3], indent + 1, return_type)
+    #     render(out, children[3], indent + 1, return_type)
 
-        print_line(out, '}', indent)
+    #     print_line(out, '}', indent)
 
-        return
+    #     return
 
-    if node.kind.name == 'BREAK_STMT':
-        print_line(out, 'break', indent)
-        return
+    # if node['node'] == 'BREAK_STMT':
+    #     print_line(out, 'break', indent)
+    #     return
 
-    if node.kind.name == 'UNARY_OPERATOR':
-        variable, operator = [t.spelling for t in list(node.get_tokens())[0:2]]
-        if operator == '++':
-            print_line(out, '%s += 1' % variable, indent)
-            #print_line(out, '%s = string(%s[1:])' % (variable, variable), indent)
-            return
+    # if node['node'] == 'UNARY_OPERATOR':
+    #     variable, operator = [t.spelling for t in list(node.get_tokens())[0:2]]
+    #     if operator == '++':
+    #         print_line(out, '%s += 1' % variable, indent)
+    #         #print_line(out, '%s = string(%s[1:])' % (variable, variable), indent)
+    #         return
 
-        print_line(out, '%s%s' % (operator, variable), indent)
-        return
+    #     print_line(out, '%s%s' % (operator, variable), indent)
+    #     return
 
-        #raise Exception('UNARY_OPERATOR: %s' % operator)
+    #     #raise Exception('UNARY_OPERATOR: %s' % operator)
 
-    if node.kind.name == 'RETURN_STMT':
+    if node['node'] == 'ReturnStmt':
         # try:
         #     e = render_expression(list(node.get_children())[0])
         #     print_line(out, 'return %s' % cast(e[0], e[1], return_type), indent)
@@ -405,11 +417,15 @@ def render(out, node, indent=0, return_type=None):
         
         return
 
-    if node.kind.name in ('BINARY_OPERATOR', 'INTEGER_LITERAL', 'CALL_EXPR'):
+    if node['node'] in ('BINARY_OPERATOR', 'INTEGER_LITERAL', 'CallExpr'):
         print_line(out, render_expression(node)[0], indent)
         return
 
-    if node.kind.name == 'TYPEDEF_DECL':
+    if node['node'] == 'TypedefDecl':
+        print_line(out, "type %s %s\n" % (node['type'], node['name']), indent)
+        # print(node)
+        return
+
         tokens = [t.spelling for t in node.get_tokens()]
         if len(list(node.get_children())) == 0:
             print_line(out, "type %s %s\n" % (tokens[-2], resolve_type(' '.join(tokens[1:-2]))), indent)
@@ -418,67 +434,70 @@ def render(out, node, indent=0, return_type=None):
 
         return
 
-    if node.kind.name == 'UNION_DECL' or node.kind.name == 'STRUCT_DECL':
-        tokens = [t.spelling for t in node.get_tokens()]
-
-        struct_name = tokens[-1]
-        start_at = 2
-        if struct_name == ';':
-            struct_name = tokens[1]
-            start_at = 3
-
-        if struct_name in ('__darwin_pthread_handler_rec', '_opaque_pthread_t',
-            '_RuneEntry', '_RuneRange', '_RuneCharClass', '_RuneLocale'):
-            return
-
-        print_line(out, "type %s struct {" % struct_name, indent)
-
-        for attribute in node.get_children():
-            print_line(out, render_expression(attribute)[0], indent + 1)
-            # print(struct_name, render_expression(attribute))
-
-        # name = ''
-        # type = ''
-        # for token in tokens[start_at:-2]:
-        #     if token == ';':
-        #         print_line(out, '%s %s' % (name, resolve_type(type)), indent + 1)
-        #         type = ''
-        #     elif is_identifier(token):
-        #         name = token
-        #     else:
-        #         type += ' ' + token
-
-        print_line(out, "}\n", indent)
-        return
-
-    if node.kind.name == 'UNEXPOSED_DECL':
-        tokens = [t.spelling for t in node.get_tokens()]
-        print_line(out, '// ' + ' '.join(tokens[1:-2]), indent)
+    if node['node'] == 'RecordDecl':
         return
 
-    if node.kind.name == 'DECL_STMT':
-        for child in node.get_children():
-            print_line(out, render_expression(child)[0], indent)
-        return
-
-    if node.kind.name == 'VAR_DECL':
-        tokens = [t.spelling for t in node.get_tokens()]
-        if tokens[0] == 'extern':
-            return
-
-        children = list(node.get_children())
-        if len(children) > 0:
-            print_line(out, 'var %s %s = %s\n' % (tokens[2], tokens[1], render_expression(children[0])[0]), indent)
-        else:
-            print_line(out, 'var %s %s\n' % (tokens[2], tokens[1]), indent)
+    #if node['node'] == 'UNION_DECL' or node['node'] == 'STRUCT_DECL':
+    #     tokens = [t.spelling for t in node.get_tokens()]
+
+    #     struct_name = tokens[-1]
+    #     start_at = 2
+    #     if struct_name == ';':
+    #         struct_name = tokens[1]
+    #         start_at = 3
+
+    #     if struct_name in ('__darwin_pthread_handler_rec', '_opaque_pthread_t',
+    #         '_RuneEntry', '_RuneRange', '_RuneCharClass', '_RuneLocale'):
+    #         return
+
+    #     print_line(out, "type %s struct {" % struct_name, indent)
+
+    #     for attribute in node.get_children():
+    #         print_line(out, render_expression(attribute)[0], indent + 1)
+    #         # print(struct_name, render_expression(attribute))
+
+    #     # name = ''
+    #     # type = ''
+    #     # for token in tokens[start_at:-2]:
+    #     #     if token == ';':
+    #     #         print_line(out, '%s %s' % (name, resolve_type(type)), indent + 1)
+    #     #         type = ''
+    #     #     elif is_identifier(token):
+    #     #         name = token
+    #     #     else:
+    #     #         type += ' ' + token
+
+    #     print_line(out, "}\n", indent)
+    #     return
+
+    # if node['node'] == 'UNEXPOSED_DECL':
+    #     tokens = [t.spelling for t in node.get_tokens()]
+    #     print_line(out, '// ' + ' '.join(tokens[1:-2]), indent)
+    #     return
+
+    # if node['node'] == 'DECL_STMT':
+    #     for child in node.get_children():
+    #         print_line(out, render_expression(child)[0], indent)
+    #     return
+
+    if node['node'] == 'VarDecl':
+    #     tokens = [t.spelling for t in node.get_tokens()]
+    #     if tokens[0] == 'extern':
+    #         return
+
+    #     children = list(node.get_children())
+    #     if len(children) > 0:
+    #         print_line(out, 'var %s %s = %s\n' % (tokens[2], tokens[1], render_expression(children[0])[0]), indent)
+    #     else:
+    #         print_line(out, 'var %s %s\n' % (tokens[2], tokens[1]), indent)
         
         return
 
-    if node.kind.name == 'ENUM_DECL':
-        print_line(out, '// enum', indent)
-        return
+    # if node['node'] == 'ENUM_DECL':
+    #     print_line(out, '// enum', indent)
+    #     return
 
-    raise Exception(node.kind)
+    raise Exception(node['node'])
 
 # 1. Compile it first (checking for errors)
 c_file_path = sys.argv[1]
@@ -491,24 +510,33 @@ def render(out, node, indent=0, return_type=None):
 with open(pp_file_path, 'w') as pp_out:
     pp_out.write(pp)
 
-# 3. Parse C and output Go
-index = clang.cindex.Index.create()
-tu = index.parse(pp_file_path)
-
-go_file_path = '%s.go' % c_file_path.split('/')[-1][:-2]
-# go_out = sys.stdout
-go_out = StringIO.StringIO()
-#with open(go_file_path, 'w') as go_out:
-# print_line(go_out, "package main\n", 0)
-#print_line(go_out, 'import ("fmt"; "os")\n', 0)
-render(go_out, tu.cursor)
-
-print("package main\n")
-print("import (")
-for import_name in sorted(imports):
-    print('\t"%s"' % import_name)
-print(")\n")
-print(go_out.getvalue())
-
-# 4. Compile the Go
-#subprocess.call(["go", "run", "functions.go", go_file_path])
+# 3. Generate JSON from AST
+ast_pp = subprocess.Popen(["clang", "-Xclang", "-ast-dump", "-fsyntax-only", pp_file_path], stdout=subprocess.PIPE)
+pp = subprocess.Popen(["python", "ast2json.py"], stdin=ast_pp.stdout, stdout=subprocess.PIPE).communicate()[0]
+
+json_file_path = 'pp.json'
+with open(json_file_path, 'w') as json_out:
+    json_out.write(pp)
+
+with open(json_file_path, 'r') as json_in:
+    # 3. Parse C and output Go
+    # index = clang.cindex.Index.create()
+    # tu = index.parse(pp_file_path)
+
+    go_file_path = '%s.go' % c_file_path.split('/')[-1][:-2]
+    # go_out = sys.stdout
+    go_out = StringIO.StringIO()
+    #with open(go_file_path, 'w') as go_out:
+    # print_line(go_out, "package main\n", 0)
+    #print_line(go_out, 'import ("fmt"; "os")\n', 0)
+    render(go_out, json.loads(json_in.read())[0])
+
+    print("package main\n")
+    print("import (")
+    for import_name in sorted(imports):
+        print('\t"%s"' % import_name)
+    print(")\n")
+    print(go_out.getvalue())
+
+    # 4. Compile the Go
+    #subprocess.call(["go", "run", "functions.go", go_file_path])

From ca3c8568fe89703ecc5de9a3df957a32177d8704 Mon Sep 17 00:00:00 2001
From: Elliot Chance <elliotchance@gmail.com>
Date: Fri, 24 Mar 2017 10:32:48 +1100
Subject: [PATCH 3/5] hello-world.c is now working with the new JSON

---
 ast2json.py | 62 ++++++++++++++++++++++++++++-------------------------
 c2go.py     | 21 ++++++++----------
 2 files changed, 42 insertions(+), 41 deletions(-)

diff --git a/ast2json.py b/ast2json.py
index 5aa3bd625..bebaf8add 100644
--- a/ast2json.py
+++ b/ast2json.py
@@ -25,40 +25,40 @@
     'AlwaysInlineAttr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> always_inline",
     'AsmLabelAttr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \"(?P<function>.+)\"",
     'AvailabilityAttr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<os>\w+) (?P<version>[\d.]+) (?P<unknown1>[\d.]+) (?P<unknown2>[\d.]+) (?P<unknown3>\".*?\"|\w+) (?P<unknown4>\".*?\"|\w+)",
-    'TranslationUnitDecl': r'^ (?P<address>[0-9a-fx]+)',
-    'IntegerLiteral': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*)\' (?P<value>\d+)',
-    'TypedefDecl': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.+?)> (?P<position2><invalid sloc>|[^ ]+)(?P<tags>.*?) (?P<name>\w+) \'(?P<type>.*?)\'(?P<type2>:\'.*?\')?',
+    'BinaryOperator': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)' '(?P<operator>.*?)'",
+    'BreakStmt': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>",
     'BuiltinType': r'^ (?P<address>[0-9a-fx]+) \'(?P<name>.*)\'',
-    'ReturnStmt': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>',
-    'StringLiteral': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*)\' (?P<value>.*)',
-    'ImplicitCastExpr': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*)\' <(?P<kind>.*)>',
-    'DeclRefExpr': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*?)\' (?P<unknown>.*)',
     'CallExpr': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*?)\'',
-    'ParenExpr': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*?)\'',
+    'CharacterLiteral': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)' (?P<value>\d+)",
     'CompoundStmt': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>',
+    'ConstantArrayType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\' (?P<size>\d+)',
+    'CStyleCastExpr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)' <(?P<kind>.*)>",
+    'DeclRefExpr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)' (lvalue (?P<kind>\w+)|Function) (?P<address2>[0-9a-fx]+) '(?P<name>.*?)' '(?P<type2>.*?)'",
+    'DeclStmt': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>",
+    'DeprecatedAttr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \"(?P<message1>.*?)\" \"(?P<message2>.*?)\"",
+    'ElaboratedType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\' (?P<tags>.+)',
+    'FieldDecl': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>[^ ]+) (?P<type>.+)',
+    'FormatAttr': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>(?P<tags> Implicit)? (?P<function>\w+) (?P<unknown1>\d+) (?P<unknown2>\d+)',
+    'ForStmt': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>",
+    'FunctionDecl': r"^ (?P<address>[0-9a-fx]+) (?P<prev>prev [0-9a-fx]+)? ?<(?P<position1>.*)> (?P<position2>[^ ]+)(?P<tags1> implicit)?(?P<tags2> used)? (?P<name>\w+) '(?P<type>.*)'(?P<tags3> extern)?",
     'IfStmt': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>',
-    'FunctionDecl': r'^ (?P<address>[0-9a-fx]+) (?P<prev>prev [0-9a-fx]+)? ?<(?P<position1>.*)> (?P<position2>[^ ]+)(?P<tags1> implicit)?(?P<tags2> used)? (?P<name>\w+) \'(?P<type>.*)\'(?P<tags3> extern)?',
+    'ImplicitCastExpr': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*)\' <(?P<kind>.*)>',
+    'IntegerLiteral': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*)\' (?P<value>\d+)',
+    'MemberExpr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)' lvalue ->(?P<name>\w+) (?P<address2>[0-9a-fx]+)",
+    'ParenExpr': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*?)\'',
     'ParmVarDecl': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>.+) \'(?P<type>.*?)\'(?P<type2>:\'.*?\')?',
-    'FormatAttr': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>(?P<tags> Implicit)? (?P<function>\w+) (?P<unknown1>\d+) (?P<unknown2>\d+)',
-    'RecordType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\'',
-    'Record': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\'',
     'PointerType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\'',
-    'Typedef': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\'',
-    'ConstantArrayType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\' (?P<size>\d+)',
+    'Record': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\'',
     'RecordDecl': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>[^ ]+) (?P<type>.+)',
-    'FieldDecl': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>[^ ]+) (?P<type>.+)',
-    'ElaboratedType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\' (?P<tags>.+)',
+    'RecordType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\'',
+    'ReturnStmt': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>',
+    'StringLiteral': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*)\'(?P<tags> lvalue)? (?P<value>.*)',
+    'TranslationUnitDecl': r'^ (?P<address>[0-9a-fx]+)',
+    'Typedef': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\'',
+    'TypedefDecl': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.+?)> (?P<position2><invalid sloc>|[^ ]+)(?P<tags>.*?) (?P<name>\w+) \'(?P<type>.*?)\'(?P<type2>:\'.*?\')?',
     'TypedefType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\' (?P<tags>.+)',
-    'VarDecl': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>[^ ]+) (?P<name>.+) '(?P<type>.+)'(?P<tags>.*)",
-    'DeprecatedAttr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \"(?P<message1>.*?)\" \"(?P<message2>.*?)\"",
-    'BinaryOperator': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)' '(?P<operator>.*?)'",
-    'MemberExpr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)' lvalue ->(?P<name>\w+) (?P<address2>[0-9a-fx]+)",
-    'CStyleCastExpr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)' <(?P<kind>.*)>",
-    'CharacterLiteral': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)' (?P<value>\d+)",
     'UnaryOperator': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)'(?P<tags1> lvalue)?(?P<tags2> prefix)?(?P<tags3> postfix)? '(?P<operator>.*?)'",
-    'DeclStmt': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>",
-    'ForStmt': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>",
-    'BreakStmt': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>",
+    'VarDecl': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>[^ ]+) (?P<name>.+) '(?P<type>.+)'(?P<tags>.*)",
 }
 
 def build_tree(nodes, depth):
@@ -108,16 +108,20 @@ def convert_lines_to_nodes(lines):
             print("Can not understand line '%s'" % line)
             sys.exit(1)
 
+        node_type = indent_and_type.group(2)
+        # if node_type == 'DeclRefExpr':
+        #     print(line[offset:])
+
         offset = len(indent_and_type.group(0))
         try:
-            result = re.search(regex[indent_and_type.group(2)], line[offset:])
+            result = re.search(regex[node_type], line[offset:])
         except KeyError:
-            print("There is no regex for '%s'." % indent_and_type.group(2))
+            print("There is no regex for '%s'." % node_type)
             print("I will print out all the lines so a regex can be created:\n")
 
             for line in lines:
                 s = re.search(r'^([|\- `]*)(\w+)', line)
-                if s is not None and indent_and_type.group(2) == s.group(2):
+                if s is not None and node_type == s.group(2):
                     print(line[offset:])
 
             sys.exit(1)
@@ -128,7 +132,7 @@ def convert_lines_to_nodes(lines):
 
         node = result.groupdict()
 
-        node['node'] = indent_and_type.group(2)
+        node['node'] = node_type
 
         indent_level = len(indent_and_type.group(1)) / 2
         nodes.append([indent_level, node])
diff --git a/c2go.py b/c2go.py
index a2180c452..7c148e494 100644
--- a/c2go.py
+++ b/c2go.py
@@ -34,7 +34,8 @@ def is_identifier(w):
 def resolve_type(s):
     s = s.strip()
 
-    if s == 'const char *' or s == 'const char*' or s == 'char *' or s == 'const char *restrict':
+    if s == 'const char *' or s == 'const char*' or s == 'char *' or \
+        s == 'const char *restrict' or s == 'const char *__restrict':
         return 'string'
 
     if s == 'float':
@@ -100,7 +101,7 @@ def resolve_type(s):
     if '(*)' in s or s == '__sFILEX *' or s == 'fpos_t':
         return "interface{}"
 
-    return s
+    # return s
 
     raise Exception('Cannot resolve type "%s"' % s)
 
@@ -151,8 +152,6 @@ def render_expression(node):
             return '// CONDITIONAL_OPERATOR: %s' % ''.join([t.spelling for t in node.get_tokens()]), 'unknown'
 
     if node['node'] == 'UNARY_OPERATOR':
-        # print(children[2].kind.name)
-
         expr_start = list(node.get_children())[0].extent.start.column
         operator = None
         for t in node.get_tokens():
@@ -203,8 +202,8 @@ def render_expression(node):
 
         return name, e[1]
 
-    if node['node'] in ('CHARACTER_LITERAL', 'STRING_LITERAL', 'FLOATING_LITERAL'):
-        return list(node.get_tokens())[0].spelling, 'const char*'
+    if node['node'] in ('CHARACTER_LITERAL', 'StringLiteral', 'FLOATING_LITERAL'):
+        return node['value'], 'const char*'
 
     if node['node'] == 'INTEGER_LITERAL':
         literal = list(node.get_tokens())[0].spelling
@@ -218,7 +217,7 @@ def render_expression(node):
         return '(%s)' % e[0], e[1]
 
     if node['node'] == 'DeclRefExpr':
-        return node['unknown'], node['type']
+        return node['name'], node['type']
 
     if node['node'] == 'ImplicitCastExpr':
         return render_expression(node['children'][0])
@@ -283,9 +282,9 @@ def render_expression(node):
     if node['node'] == 'PARM_DECL':
         return resolve_type(node.type.spelling), 'unknown'
 
-    return node['node'], 'unknown'
+    # return node['node'], 'unknown'
 
-    #raise Exception('render_expression: %s' % node.kind)
+    raise Exception('render_expression: %s' % node['node'])
 
 def print_children(node):
     print(len(list(node.get_children())), [t.spelling for t in node.get_tokens()])
@@ -315,8 +314,6 @@ def render(out, node, indent=0, return_type=None):
             for c in node['children']:
                 if c['node'] == 'CompoundStmt':
                     has_body = True
-            # print(function_name)
-            # print(json.dumps(node['children']))
 
         args = []
         # for a in get_function_params(node):
@@ -339,7 +336,7 @@ def render(out, node, indent=0, return_type=None):
 
             print_line(out, '}\n', indent)
 
-    #     function_defs[node.spelling] = (node.result_type.spelling, [a.type.spelling for a in node.get_arguments()])
+        function_defs[node['name']] = (node['type'], [a['type'] for a in get_function_params(node)])
 
         return
 

From a33b4eb146bf74f3fb0810f0eb52e3a76832320a Mon Sep 17 00:00:00 2001
From: Elliot Chance <elliotchance@gmail.com>
Date: Sat, 25 Mar 2017 12:07:56 +1100
Subject: [PATCH 4/5] argv.c array.c comments.c fib.c now working

---
 ast2json.py |   9 ++--
 c2go.py     | 118 ++++++++++++++++++++++++++--------------------------
 2 files changed, 66 insertions(+), 61 deletions(-)

diff --git a/ast2json.py b/ast2json.py
index bebaf8add..b7e6f23c3 100644
--- a/ast2json.py
+++ b/ast2json.py
@@ -23,6 +23,7 @@
 
 regex = {
     'AlwaysInlineAttr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> always_inline",
+    'ArraySubscriptExpr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)' (?P<tags>.*)",
     'AsmLabelAttr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \"(?P<function>.+)\"",
     'AvailabilityAttr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<os>\w+) (?P<version>[\d.]+) (?P<unknown1>[\d.]+) (?P<unknown2>[\d.]+) (?P<unknown3>\".*?\"|\w+) (?P<unknown4>\".*?\"|\w+)",
     'BinaryOperator': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)' '(?P<operator>.*?)'",
@@ -38,15 +39,16 @@
     'DeprecatedAttr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \"(?P<message1>.*?)\" \"(?P<message2>.*?)\"",
     'ElaboratedType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\' (?P<tags>.+)',
     'FieldDecl': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>[^ ]+) (?P<type>.+)',
+    'FloatingLiteral': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*)' (?P<value>.+)",
     'FormatAttr': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>(?P<tags> Implicit)? (?P<function>\w+) (?P<unknown1>\d+) (?P<unknown2>\d+)',
     'ForStmt': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>",
     'FunctionDecl': r"^ (?P<address>[0-9a-fx]+) (?P<prev>prev [0-9a-fx]+)? ?<(?P<position1>.*)> (?P<position2>[^ ]+)(?P<tags1> implicit)?(?P<tags2> used)? (?P<name>\w+) '(?P<type>.*)'(?P<tags3> extern)?",
     'IfStmt': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>',
     'ImplicitCastExpr': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*)\' <(?P<kind>.*)>',
-    'IntegerLiteral': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*)\' (?P<value>\d+)',
-    'MemberExpr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)' lvalue ->(?P<name>\w+) (?P<address2>[0-9a-fx]+)",
+    'IntegerLiteral': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*)\' (?P<value>.+)',
+    'MemberExpr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)' (?P<tags>.*?)(?P<name>\w+) (?P<address2>[0-9a-fx]+)",
     'ParenExpr': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*?)\'',
-    'ParmVarDecl': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>.+) \'(?P<type>.*?)\'(?P<type2>:\'.*?\')?',
+    'ParmVarDecl': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>.+?)(?P<name> \w+)? '(?P<type>.*?)'(?P<type2>:'.*?')?",
     'PointerType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\'',
     'Record': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\'',
     'RecordDecl': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>[^ ]+) (?P<type>.+)',
@@ -59,6 +61,7 @@
     'TypedefType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\' (?P<tags>.+)',
     'UnaryOperator': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)'(?P<tags1> lvalue)?(?P<tags2> prefix)?(?P<tags3> postfix)? '(?P<operator>.*?)'",
     'VarDecl': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>[^ ]+) (?P<name>.+) '(?P<type>.+)'(?P<tags>.*)",
+    'WhileStmt': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>",
 }
 
 def build_tree(nodes, depth):
diff --git a/c2go.py b/c2go.py
index 7c148e494..5cb35f1ea 100644
--- a/c2go.py
+++ b/c2go.py
@@ -56,7 +56,7 @@ def resolve_type(s):
     if s == 'int' or s == '__darwin_ct_rune_t':
         return s
 
-    if s == 'long':
+    if s == 'long' or s == '__mbstate_t' or s == '__builtin_va_list':
         return 'int64'
 
     if s == 'long long':
@@ -86,6 +86,9 @@ def resolve_type(s):
     if s == 'long int':
         return 'int32'
 
+    if s == '__int128':
+        return 'int64'
+
     if re.match('unsigned char \\[\\d+\\]', s):
         return s[14:] + 'byte'
 
@@ -101,9 +104,9 @@ def resolve_type(s):
     if '(*)' in s or s == '__sFILEX *' or s == 'fpos_t':
         return "interface{}"
 
-    # return s
+    return s
 
-    raise Exception('Cannot resolve type "%s"' % s)
+    # raise Exception('Cannot resolve type "%s"' % s)
 
 def cast(expr, from_type, to_type):
     from_type = resolve_type(from_type)
@@ -128,15 +131,9 @@ def print_line(out, line, indent):
     out.write('%s%s\n' % ('\t' * indent, line))
 
 def render_expression(node):
-    if node['node'] == 'BINARY_OPERATOR':
-        end_of_left = list(node.get_children())[0].extent.end.column
-        operator = None
-        for t in node.get_tokens():
-            if t.extent.start.column >= end_of_left:
-                operator = t.spelling
-                break
-
-        left, right = [render_expression(t)[0] for t in list(node.get_children())]
+    if node['node'] == 'BinaryOperator':
+        operator = node['operator']
+        left, right = [render_expression(t)[0] for t in node['children']]
 
         return_type = 'bool'
         if operator == '|' or operator == '&':
@@ -151,19 +148,9 @@ def render_expression(node):
         except TypeError:
             return '// CONDITIONAL_OPERATOR: %s' % ''.join([t.spelling for t in node.get_tokens()]), 'unknown'
 
-    if node['node'] == 'UNARY_OPERATOR':
-        expr_start = list(node.get_children())[0].extent.start.column
-        operator = None
-        for t in node.get_tokens():
-            if t.extent.start.column >= expr_start:
-                break
-
-            operator = t.spelling
-
-        if operator is None:
-            operator = '++'
-
-        expr = render_expression(list(node.get_children())[0])
+    if node['node'] == 'UnaryOperator':
+        operator = node['operator']
+        expr = render_expression(node['children'][0])
 
         if operator == '!':
             return '%s(%s)' % ('__not_%s' % expr[1], expr[0]), expr[1]
@@ -193,20 +180,20 @@ def render_expression(node):
         e = render_expression(children[0])
         name = e[0]
 
-        if name == 'argc':
-            name = 'len(os.Args)'
-            add_import("os")
-        elif name == 'argv':
-            name = 'os.Args'
-            add_import("os")
+        # if name == 'argc':
+        #     name = 'len(os.Args)'
+        #     add_import("os")
+        # elif name == 'argv':
+        #     name = 'os.Args'
+        #     add_import("os")
 
         return name, e[1]
 
     if node['node'] in ('CHARACTER_LITERAL', 'StringLiteral', 'FLOATING_LITERAL'):
         return node['value'], 'const char*'
 
-    if node['node'] == 'INTEGER_LITERAL':
-        literal = list(node.get_tokens())[0].spelling
+    if node['node'] == 'IntegerLiteral':
+        literal = node['value']
         if literal[-1] == 'L':
             literal = '%s(%s)' % (resolve_type('long'), literal[:-1])
 
@@ -217,7 +204,16 @@ def render_expression(node):
         return '(%s)' % e[0], e[1]
 
     if node['node'] == 'DeclRefExpr':
-        return node['name'], node['type']
+        name = node['name']
+
+        if name == 'argc':
+            name = 'len(os.Args)'
+            add_import("os")
+        elif name == 'argv':
+            name = 'os.Args'
+            add_import("os")
+
+        return name, node['type']
 
     if node['node'] == 'ImplicitCastExpr':
         return render_expression(node['children'][0])
@@ -247,8 +243,8 @@ def render_expression(node):
 
         return '%s(%s)' % (func_name, ', '.join(args)), func_def[0]
 
-    if node['node'] == 'ARRAY_SUBSCRIPT_EXPR':
-        children = list(node.get_children())
+    if node['node'] == 'ArraySubscriptExpr':
+        children = node['children']
         return '%s[%s]' % (render_expression(children[0])[0],
             render_expression(children[1])[0]), 'unknown'
 
@@ -260,22 +256,22 @@ def render_expression(node):
         children = list(node.get_children())
         return render_expression(children[0]), 'unknown'
 
-    if node['node'] == 'FIELD_DECL' or node['node'] == 'VAR_DECL':
-        type = resolve_type(node.type.spelling)
-        name = node.spelling
+    if node['node'] == 'FIELD_DECL' or node['node'] == 'VarDecl':
+        type = resolve_type(node['type'])
+        name = node['name'].replace('used', '')
 
         prefix = ''
-        if node['node'] == 'VAR_DECL':
+        if node['node'] == 'VarDecl':
             prefix = 'var '
 
         suffix = ''
-        children = list(node.get_children())
+        # children = node['children']
 
         # We must check the position of the child is at the end. Otherwise a
         # child can refer to another expression like the size of the data type.
-        if len(children) > 0 and children[0].extent.end.column == node.extent.end.column:
-            e = render_expression(children[0])
-            suffix = ' = %s' % cast(e[0], e[1], type)
+        # if len(children) > 0 and children[0].extent.end.column == node.extent.end.column:
+        #     e = render_expression(children[0])
+        #     suffix = ' = %s' % cast(e[0], e[1], type)
 
         return '%s%s %s%s' % (prefix, name, type, suffix), 'unknown'
 
@@ -316,8 +312,8 @@ def render(out, node, indent=0, return_type=None):
                     has_body = True
 
         args = []
-        # for a in get_function_params(node):
-        #     args.append('%s %s' % (a['name'], resolve_type(a['type'])))
+        for a in get_function_params(node):
+            args.append('%s %s' % (a['name'], resolve_type(a['type'])))
 
         if has_body:
             return_type = ' ' + node['type']
@@ -377,17 +373,17 @@ def render(out, node, indent=0, return_type=None):
 
     #     return
 
-    # if node['node'] == 'FOR_STMT':
-    #     children = list(node.get_children())
+    if node['node'] == 'ForStmt':
+        children = node['children']
 
-    #     a, b, c = [render_expression(e)[0] for e in children[:3]]
-    #     print_line(out, 'for %s; %s; %s {' % (a, b, c), indent)
+        a, b, c = [render_expression(e)[0] for e in children[:3]]
+        print_line(out, 'for %s; %s; %s {' % (a, b, c), indent)
 
-    #     render(out, children[3], indent + 1, return_type)
+        render(out, children[3], indent + 1, return_type)
 
-    #     print_line(out, '}', indent)
+        print_line(out, '}', indent)
 
-    #     return
+        return
 
     # if node['node'] == 'BREAK_STMT':
     #     print_line(out, 'break', indent)
@@ -419,7 +415,13 @@ def render(out, node, indent=0, return_type=None):
         return
 
     if node['node'] == 'TypedefDecl':
-        print_line(out, "type %s %s\n" % (node['type'], node['name']), indent)
+        # FIXME: All of the logic here is just to avoid errors, it needs to be
+        # fixed up.
+        if 'struct' in node['type'] or 'union' in node['type']:
+            return
+        node['type'] = node['type'].replace('unsigned', '')
+
+        print_line(out, "type %s %s\n" % (node['name'], resolve_type(node['type'])), indent)
         # print(node)
         return
 
@@ -472,10 +474,10 @@ def render(out, node, indent=0, return_type=None):
     #     print_line(out, '// ' + ' '.join(tokens[1:-2]), indent)
     #     return
 
-    # if node['node'] == 'DECL_STMT':
-    #     for child in node.get_children():
-    #         print_line(out, render_expression(child)[0], indent)
-    #     return
+    if node['node'] == 'DeclStmt':
+        for child in node['children']:
+            print_line(out, render_expression(child)[0], indent)
+        return
 
     if node['node'] == 'VarDecl':
     #     tokens = [t.spelling for t in node.get_tokens()]

From d0415975810f774beb4b14e4caefc02d717c485e Mon Sep 17 00:00:00 2001
From: Elliot Chance <elliotchance@gmail.com>
Date: Sat, 25 Mar 2017 13:51:45 +1100
Subject: [PATCH 5/5] All tests now parsing

---
 ast2json.py |  10 ++---
 c2go.py     | 111 +++++++++++++++++++++-------------------------------
 2 files changed, 50 insertions(+), 71 deletions(-)

diff --git a/ast2json.py b/ast2json.py
index b7e6f23c3..3f8827153 100644
--- a/ast2json.py
+++ b/ast2json.py
@@ -34,11 +34,11 @@
     'CompoundStmt': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>',
     'ConstantArrayType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\' (?P<size>\d+)',
     'CStyleCastExpr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)' <(?P<kind>.*)>",
-    'DeclRefExpr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)' (lvalue (?P<kind>\w+)|Function) (?P<address2>[0-9a-fx]+) '(?P<name>.*?)' '(?P<type2>.*?)'",
+    'DeclRefExpr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)'.*? (lvalue (?P<kind>\w+)|Function) (?P<address2>[0-9a-fx]+) '(?P<name>.*?)' '(?P<type2>.*?)'",
     'DeclStmt': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>",
     'DeprecatedAttr': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \"(?P<message1>.*?)\" \"(?P<message2>.*?)\"",
     'ElaboratedType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\' (?P<tags>.+)',
-    'FieldDecl': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>[^ ]+) (?P<type>.+)',
+    'FieldDecl': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>[^ ]+) (?P<tags>.*?)(?P<name>\w+?) '(?P<type>.+?)'",
     'FloatingLiteral': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*)' (?P<value>.+)",
     'FormatAttr': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>(?P<tags> Implicit)? (?P<function>\w+) (?P<unknown1>\d+) (?P<unknown2>\d+)',
     'ForStmt': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>",
@@ -51,7 +51,7 @@
     'ParmVarDecl': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>.+?)(?P<name> \w+)? '(?P<type>.*?)'(?P<type2>:'.*?')?",
     'PointerType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\'',
     'Record': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\'',
-    'RecordDecl': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>[^ ]+) (?P<type>.+)',
+    'RecordDecl': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>[^ ]+) (?P<kind>struct|union) (?P<name>\w+)",
     'RecordType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\'',
     'ReturnStmt': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>',
     'StringLiteral': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> \'(?P<type>.*)\'(?P<tags> lvalue)? (?P<value>.*)',
@@ -60,7 +60,7 @@
     'TypedefDecl': r'^ (?P<address>[0-9a-fx]+) <(?P<position>.+?)> (?P<position2><invalid sloc>|[^ ]+)(?P<tags>.*?) (?P<name>\w+) \'(?P<type>.*?)\'(?P<type2>:\'.*?\')?',
     'TypedefType': r'^ (?P<address>[0-9a-fx]+) \'(?P<type>.*)\' (?P<tags>.+)',
     'UnaryOperator': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> '(?P<type>.*?)'(?P<tags1> lvalue)?(?P<tags2> prefix)?(?P<tags3> postfix)? '(?P<operator>.*?)'",
-    'VarDecl': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>[^ ]+) (?P<name>.+) '(?P<type>.+)'(?P<tags>.*)",
+    'VarDecl': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)> (?P<position2>[^ ]+) (?P<name>.+) '(?P<type>.+?)'.*?(?P<tags>.*)",
     'WhileStmt': r"^ (?P<address>[0-9a-fx]+) <(?P<position>.*)>",
 }
 
@@ -112,7 +112,7 @@ def convert_lines_to_nodes(lines):
             sys.exit(1)
 
         node_type = indent_and_type.group(2)
-        # if node_type == 'DeclRefExpr':
+        # if node_type == 'FieldDecl':
         #     print(line[offset:])
 
         offset = len(indent_and_type.group(0))
diff --git a/c2go.py b/c2go.py
index 5cb35f1ea..0837953d7 100644
--- a/c2go.py
+++ b/c2go.py
@@ -41,7 +41,7 @@ def resolve_type(s):
     if s == 'float':
         return 'float32'
 
-    if s == 'void *':
+    if s == 'void *' or s == '__darwin_pthread_handler_rec *':
         return 'interface{}'
 
     if s == 'char':
@@ -104,6 +104,9 @@ def resolve_type(s):
     if '(*)' in s or s == '__sFILEX *' or s == 'fpos_t':
         return "interface{}"
 
+    if '(' in s:
+        return 'interface{}'
+
     return s
 
     # raise Exception('Cannot resolve type "%s"' % s)
@@ -169,27 +172,7 @@ def render_expression(node):
 
         return '%s%s' % (operator, expr[0]), expr[1]
 
-    if node['node'] == 'UNEXPOSED_EXPR':
-        children = list(node.get_children())
-        if len(children) < 1:
-            return '// UNEXPOSED_EXPR: %s' % ''.join([t.spelling for t in node.get_tokens()]), 'unknown'
-
-        # if len(children) > 1:
-        #     raise Exception('To many children!')
-
-        e = render_expression(children[0])
-        name = e[0]
-
-        # if name == 'argc':
-        #     name = 'len(os.Args)'
-        #     add_import("os")
-        # elif name == 'argv':
-        #     name = 'os.Args'
-        #     add_import("os")
-
-        return name, e[1]
-
-    if node['node'] in ('CHARACTER_LITERAL', 'StringLiteral', 'FLOATING_LITERAL'):
+    if node['node'] in ('CHARACTER_LITERAL', 'StringLiteral', 'FloatingLiteral'):
         return node['value'], 'const char*'
 
     if node['node'] == 'IntegerLiteral':
@@ -248,15 +231,15 @@ def render_expression(node):
         return '%s[%s]' % (render_expression(children[0])[0],
             render_expression(children[1])[0]), 'unknown'
 
-    if node['node'] == 'MEMBER_REF_EXPR':
-        children = list(node.get_children())
-        return '%s.%s' % (render_expression(children[0])[0], list(node.get_tokens())[-2].spelling), 'unknown'
+    if node['node'] == 'MemberExpr':
+        children = node['children']
+        return '%s.%s' % (render_expression(children[0])[0], node['name']), children[0]['type']
 
     if node['node'] == 'CSTYLE_CAST_EXPR':
         children = list(node.get_children())
         return render_expression(children[0]), 'unknown'
 
-    if node['node'] == 'FIELD_DECL' or node['node'] == 'VarDecl':
+    if node['node'] == 'FieldDecl' or node['node'] == 'VarDecl':
         type = resolve_type(node['type'])
         name = node['name'].replace('used', '')
 
@@ -265,13 +248,9 @@ def render_expression(node):
             prefix = 'var '
 
         suffix = ''
-        # children = node['children']
-
-        # We must check the position of the child is at the end. Otherwise a
-        # child can refer to another expression like the size of the data type.
-        # if len(children) > 0 and children[0].extent.end.column == node.extent.end.column:
-        #     e = render_expression(children[0])
-        #     suffix = ' = %s' % cast(e[0], e[1], type)
+        if 'children' in node:
+            children = node['children']
+            suffix = ' = %s' % render_expression(children[0])[0]
 
         return '%s%s %s%s' % (prefix, name, type, suffix), 'unknown'
 
@@ -317,7 +296,7 @@ def render(out, node, indent=0, return_type=None):
 
         if has_body:
             return_type = ' ' + node['type']
-            if return_type == ' void':
+            if return_type == ' void ()':
                 return_type = ''
 
             if function_name == 'main':
@@ -345,33 +324,33 @@ def render(out, node, indent=0, return_type=None):
             render(out, c, indent, return_type)
         return
 
-    # if node['node'] == 'IF_STMT':
-    #     children = list(node.get_children())
+    if node['node'] == 'IfStmt':
+        children = node['children']
 
-    #     e = render_expression(children[0])
-    #     print_line(out, 'if %s {' % cast(e[0], e[1], 'bool'), indent)
+        e = render_expression(children[0])
+        print_line(out, 'if %s {' % cast(e[0], e[1], 'bool'), indent)
 
-    #     render(out, children[1], indent + 1, return_type)
+        render(out, children[1], indent + 1, return_type)
 
-    #     if len(children) > 2:
-    #         print_line(out, '} else {', indent)
-    #         render(out, children[2], indent + 1, return_type)
+        if len(children) > 2:
+            print_line(out, '} else {', indent)
+            render(out, children[2], indent + 1, return_type)
 
-    #     print_line(out, '}', indent)
+        print_line(out, '}', indent)
 
-        # return
+        return
 
-    # if node['node'] == 'WHILE_STMT':
-    #     children = list(node.get_children())
+    if node['node'] == 'WhileStmt':
+        children = node['children']
 
-    #     e = render_expression(children[0])
-    #     print_line(out, 'for %s {' % cast(e[0], e[1], 'bool'), indent)
+        e = render_expression(children[0])
+        print_line(out, 'for %s {' % cast(e[0], e[1], 'bool'), indent)
 
-    #     render(out, children[1], indent + 1, return_type)
+        render(out, children[1], indent + 1, return_type)
 
-    #     print_line(out, '}', indent)
+        print_line(out, '}', indent)
 
-    #     return
+        return
 
     if node['node'] == 'ForStmt':
         children = node['children']
@@ -385,21 +364,13 @@ def render(out, node, indent=0, return_type=None):
 
         return
 
-    # if node['node'] == 'BREAK_STMT':
-    #     print_line(out, 'break', indent)
-    #     return
-
-    # if node['node'] == 'UNARY_OPERATOR':
-    #     variable, operator = [t.spelling for t in list(node.get_tokens())[0:2]]
-    #     if operator == '++':
-    #         print_line(out, '%s += 1' % variable, indent)
-    #         #print_line(out, '%s = string(%s[1:])' % (variable, variable), indent)
-    #         return
-
-    #     print_line(out, '%s%s' % (operator, variable), indent)
-    #     return
+    if node['node'] == 'BreakStmt':
+        print_line(out, 'break', indent)
+        return
 
-    #     #raise Exception('UNARY_OPERATOR: %s' % operator)
+    if node['node'] == 'UnaryOperator':
+        print_line(out, render_expression(node)[0], indent)
+        return
 
     if node['node'] == 'ReturnStmt':
         # try:
@@ -410,7 +381,7 @@ def render(out, node, indent=0, return_type=None):
         
         return
 
-    if node['node'] in ('BINARY_OPERATOR', 'INTEGER_LITERAL', 'CallExpr'):
+    if node['node'] in ('BinaryOperator', 'INTEGER_LITERAL', 'CallExpr'):
         print_line(out, render_expression(node)[0], indent)
         return
 
@@ -434,6 +405,14 @@ def render(out, node, indent=0, return_type=None):
         return
 
     if node['node'] == 'RecordDecl':
+        if node['kind'] == 'union':
+            return
+
+        print_line(out, "type %s %s {" % (node['name'], node['kind']), indent)
+        if 'children' in node:
+            for c in node['children']:
+                print_line(out, render_expression(c)[0], indent + 1)
+        print_line(out, "}\n", indent)
         return
 
     #if node['node'] == 'UNION_DECL' or node['node'] == 'STRUCT_DECL':