Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added serialization and token collection. #31

Open
wants to merge 34 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
5d1bad5
Massive refactoring of the giant 2 files into multiple smaller files
Craxic Mar 8, 2015
2fc9b87
More work on making the tokens available to the user
Craxic Mar 10, 2015
1ba1bfc
More work fixing type assertions
Mar 11, 2015
14aa960
More work on altering models
Craxic Mar 12, 2015
cad0e3b
Yet more work...
Craxic Mar 12, 2015
f6cc46e
Surprise! More work.
Craxic Mar 13, 2015
ce065db
More work. Making OpenJDK test
Craxic Mar 15, 2015
71f9284
Fixed all parsing bugs (hopefully) and finished OpenJDK7 test
Craxic Mar 15, 2015
feb0732
Added a few serializers
Craxic Mar 15, 2015
8571965
More work...
Craxic Mar 16, 2015
e71d7a2
Added more serializers
Craxic Mar 16, 2015
48a837c
Added more serialization
Craxic Mar 17, 2015
a719ded
Added more serializers
Craxic Mar 17, 2015
8ad9490
Added yet more serializers
Craxic Mar 17, 2015
264476d
Tiny bit more work here and there.
Craxic Mar 18, 2015
832cadb
Bit more fixes for serialization
Mar 18, 2015
e12ca52
A little bit more tidying up of serializers.
Craxic Mar 18, 2015
0536315
Serializers all done now, works for all of OpenJDK7. Now need to veri…
Craxic Mar 18, 2015
a8b33e8
More bug fixes to serializers
Craxic Mar 19, 2015
a373902
Fixed a few issues with the serializers
Craxic Mar 19, 2015
fa7bbe8
Started doing a of cleanup. Serializers still not 100% though.
Craxic Mar 20, 2015
65473f3
Massive changes, many of them untested. Started writing another examp…
Craxic Mar 22, 2015
2f86829
Tested.
Craxic Mar 22, 2015
e38ba11
To Python 3
Craxic Mar 22, 2015
edecf7b
Fixed file closing issue.
Craxic Mar 22, 2015
1624379
Fixed encoding issue
Craxic Mar 22, 2015
7851a2b
Fixed Python2 compatibility. Prevented running OpenJDK test in travis…
Craxic Mar 23, 2015
fbf979e
Minor updates to the function_cacher.py example
Craxic Mar 23, 2015
6bea580
Updated setup.py
Craxic Mar 23, 2015
7f44103
Minor changes to function_cacher and source_element.py
Craxic Mar 23, 2015
49828c4
Fixed mistake I made with equality function, fixed error in function_…
Craxic Mar 23, 2015
c14cb92
Altered the way static functions are cached
Craxic Mar 23, 2015
3241e41
Decided that this whole function cacher shouldn't be an example and d…
Craxic Mar 23, 2015
317c779
Forced usage of old dependency
Craxic Jul 21, 2016
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ parser.out
cover/
dist/
build/

openjdk7/
2 changes: 1 addition & 1 deletion COPYING
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright (c) 2012, Werner Hahn
Copyright (c) 2015, Werner Hahn, Matthew Ready and contributors
All rights reserved.

Redistribution and use in source and binary forms, with or without
Expand Down
8 changes: 3 additions & 5 deletions example/parse_expr.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
#!/usr/bin/env python2

import sys
from plyj.parser import Parser

if len(sys.argv) == 1:
print('''usage: parse_expr.py <expression> ...
Example: parse_expr.py '1+2' '3' 'j = (int) i + 3' ''')
sys.exit(1)

import plyj.parser as plyj

parser = plyj.Parser()
parser = Parser()
for expr in sys.argv[1:]:
print(parser.parse_expression(expr))

print(parser.parse_expression(expr))
59 changes: 26 additions & 33 deletions example/symbols.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,42 @@
#!/usr/bin/env python2

import sys
import plyj.parser
import plyj.model as m
from plyj.model.classes import FieldDeclaration
from plyj.model.method import MethodDeclaration
from plyj.model.statement import VariableDeclaration
from plyj.parser import Parser

p = plyj.parser.Parser()
p = Parser()
tree = p.parse_file(sys.argv[1])

print('declared types:')
for type_decl in tree.type_declarations:
print(type_decl.name)
print(type_decl.name.serialize())
if type_decl.extends is not None:
print(' -> extending ' + type_decl.extends.name.value)
if len(type_decl.implements) is not 0:
print(' -> implementing ' + ', '.join([type.name.value for type in type_decl.implements]))
print
print(' -> extending ' + type_decl.extends.name.serialize())
if len(type_decl.implements) != 0:
implements = (type_.serialize() for type_ in type_decl.implements)
print(' -> implementing ' + ', '.join(implements))

print('fields:')
for field_decl in [decl for decl in type_decl.body if type(decl) is m.FieldDeclaration]:
for var_decl in field_decl.variable_declarators:
if type(field_decl.type) is str:
type_name = field_decl.type
else:
type_name = field_decl.type.name.value
print(' ' + type_name + ' ' + var_decl.variable.name)
for field_decl in type_decl.body:
if not isinstance(field_decl, FieldDeclaration):
continue
print(' ' + field_decl.serialize())

print
print('methods:')
for method_decl in [decl for decl in type_decl.body if type(decl) is m.MethodDeclaration]:
param_strings = []
for param in method_decl.parameters:
if type(param.type) is str:
param_strings.append(param.type + ' ' + param.variable.name)
else:
param_strings.append(param.type.name.value + ' ' + param.variable.name)
print(' ' + method_decl.name + '(' + ', '.join(param_strings) + ')')
for method_decl in type_decl.body:
if not isinstance(method_decl, MethodDeclaration):
continue

param_strings = (param.serialize() for param in method_decl.parameters)
name = method_decl.name.serialize()
print(' ' + name + '(' + ', '.join(param_strings) + ')')

if method_decl.body is not None:
for statement in method_decl.body:
# note that this misses variables in inner blocks such as for loops
# see symbols_visitor.py for a better way of handling this
if type(statement) is m.VariableDeclaration:
for var_decl in statement.variable_declarators:
if type(statement.type) is str:
type_name = statement.type
else:
type_name = statement.type.name.value
print(' ' + type_name + ' ' + var_decl.variable.name)
# note that this misses variables in inner blocks such as for
# loops see symbols_visitor.py for a better way of handling
# this
if type(statement) is VariableDeclaration:
print(' ' + statement.serialize())
30 changes: 15 additions & 15 deletions example/symbols_visitor.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#!/usr/bin/env python2

import sys
import plyj.parser
import plyj.model as m
from plyj.parser import Parser
import plyj.visitor as m

p = plyj.parser.Parser()
p = Parser()
tree = p.parse_file(sys.argv[1])

class MyVisitor(m.Visitor):

class MyVisitor(m.Visitor):
def __init__(self):
super(MyVisitor, self).__init__()

Expand All @@ -27,7 +27,7 @@ def visit_type_declaration(self, type_decl):
print(' -> extending ' + type_decl.extends.name.value)
if len(type_decl.implements) is not 0:
print(' -> implementing ' + ', '.join([type.name.value for type in type_decl.implements]))
print
print()

return True

Expand All @@ -37,34 +37,34 @@ def visit_FieldDeclaration(self, field_decl):
self.first_field = False
for var_decl in field_decl.variable_declarators:
if type(field_decl.type) is str:
type_name = field_decl.type
type_name = field_decl.type.serialize()
else:
type_name = field_decl.type.name.value
print(' ' + type_name + ' ' + var_decl.variable.name)
type_name = field_decl.type.name.serialize()
print(' ' + type_name + ' ' + var_decl.variable.name.serialize())

def visit_MethodDeclaration(self, method_decl):
if self.first_method:
print
print()
print('methods:')
self.first_method = False

param_strings = []
for param in method_decl.parameters:
if type(param.type) is str:
param_strings.append(param.type + ' ' + param.variable.name)
param_strings.append(param.type.serialize() + ' ' + param.variable.serialize())
else:
param_strings.append(param.type.name.value + ' ' + param.variable.name)
print(' ' + method_decl.name + '(' + ', '.join(param_strings) + ')')
param_strings.append(param.type.name.serialize() + ' ' + param.variable.name.serialize())
print(' ' + method_decl.name.serialize() + '(' + ', '.join(param_strings) + ')')

return True

def visit_VariableDeclaration(self, var_declaration):
for var_decl in var_declaration.variable_declarators:
if type(var_declaration.type) is str:
type_name = var_declaration.type
type_name = var_declaration.type.serialize()
else:
type_name = var_declaration.type.name.value
print(' ' + type_name + ' ' + var_decl.variable.name)
type_name = var_declaration.type.name.serialize()
print(' ' + type_name + ' ' + var_decl.variable.name.serialize())

print('declared types:')
tree.accept(MyVisitor())
31 changes: 31 additions & 0 deletions example/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env python2
import os
from plyj.parser import Parser

JAVA_FILES_FOLDER = "./"


def find_java_files():
result = []
# Thanks ghostdog74 https://stackoverflow.com/questions/3964681
for root, dirs, files in os.walk(JAVA_FILES_FOLDER):
for file_ in files:
if file_.endswith(".java"):
result.append(os.path.join(root, f))
return result

p = Parser()
for f in find_java_files():
print("Parsing " + f)
parse = p.parse_file(f)

print("Serializing " + f)
serialized = parse.serialize()
with open("test.java", "w") as j:
j.write(serialized)

print("Parsing (2) " + f)
parse2 = p.parse_file("test.java")

print("Asserting " + f)
assert parse == parse2
107 changes: 107 additions & 0 deletions plyj/java_lexer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#!/usr/bin/env python2


class JavaLexer(object):
"""
There should be no reason to use this class directly. Please use Parser in
parser.py

This class implements the Java lexer for Lex.
"""

keywords = ('this', 'class', 'void', 'super', 'extends', 'implements',
'enum', 'interface', 'byte', 'short', 'int', 'long', 'char',
'float', 'double', 'boolean', 'null', 'true', 'false', 'final',
'public', 'protected', 'private', 'abstract', 'static',
'strictfp', 'transient', 'volatile', 'synchronized', 'native',
'throws', 'default', 'instanceof', 'if', 'else', 'while',
'for', 'switch', 'case', 'assert', 'do', 'break', 'continue',
'return', 'throw', 'try', 'catch', 'finally', 'new', 'package',
'import')

tokens = ['NAME', 'NUM',

'CHAR_LITERAL', 'STRING_LITERAL',

'LINE_COMMENT', 'BLOCK_COMMENT',

'OR', 'AND',
'EQ', 'NEQ', 'GTEQ', 'LTEQ',
'LSHIFT', 'RSHIFT', 'RRSHIFT',

'TIMES_ASSIGN', 'DIVIDE_ASSIGN', 'REMAINDER_ASSIGN',
'PLUS_ASSIGN', 'MINUS_ASSIGN', 'LSHIFT_ASSIGN', 'RSHIFT_ASSIGN',
'RRSHIFT_ASSIGN', 'AND_ASSIGN', 'OR_ASSIGN', 'XOR_ASSIGN',

'PLUSPLUS', 'MINUSMINUS',

'ELLIPSIS'] + [k.upper() for k in keywords]

literals = '()+-*/=?:,.^|&~!=[]{};<>@%'

t_NUM = r'\.?[0-9][0-9eE_lLdDa-fA-F.xXpP]*'
t_CHAR_LITERAL = r'\'([^\\\n]|(\\.))*?\''
t_STRING_LITERAL = r'\"([^\\\n]|(\\.))*?\"'

t_ignore_LINE_COMMENT = '//.*'

@staticmethod
def t_BLOCK_COMMENT(t):
r"""/\*(.|\n)*?\*/"""
t.lexer.lineno += t.value.count('\n')

t = 0

t_OR = r'\|\|'
t_AND = '&&'

t_EQ = '=='
t_NEQ = '!='
t_GTEQ = '>='
t_LTEQ = '<='

t_LSHIFT = '<<'
t_RSHIFT = '>>'
t_RRSHIFT = '>>>'

t_TIMES_ASSIGN = r'\*='
t_DIVIDE_ASSIGN = '/='
t_REMAINDER_ASSIGN = '%='
t_PLUS_ASSIGN = r'\+='
t_MINUS_ASSIGN = '-='
t_LSHIFT_ASSIGN = '<<='
t_RSHIFT_ASSIGN = '>>='
t_RRSHIFT_ASSIGN = '>>>='
t_AND_ASSIGN = '&='
t_OR_ASSIGN = r'\|='
t_XOR_ASSIGN = '\^='

t_PLUSPLUS = r'\+\+'
t_MINUSMINUS = r'\-\-'

t_ELLIPSIS = r'\.\.\.'

t_ignore = ' \t\f'

@staticmethod
def t_NAME(t):
"""[A-Za-z_$][A-Za-z0-9_$]*"""
if t.value in JavaLexer.keywords:
t.type = t.value.upper()
return t

@staticmethod
def t_newline(t):
r"""\n+"""
t.lexer.lineno += len(t.value)

@staticmethod
def t_newline_win(t):
r"""(\r\n)+"""
t.lexer.lineno += len(t.value) / 2

@staticmethod
def t_error(t):
print("Illegal character '{}' ({}) in line {}"
.format(t.value[0], hex(ord(t.value[0])), t.lexer.lineno))
t.lexer.skip(1)
44 changes: 44 additions & 0 deletions plyj/java_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env python2
from plyj.java_lexer import JavaLexer

from plyj.parsers.class_parser import ClassParser
from plyj.parsers.compilation_unit_parser import CompilationUnitParser
from plyj.parsers.expression_parser import ExpressionParser
from plyj.parsers.literal_parser import LiteralParser
from plyj.parsers.name_parser import NameParser
from plyj.parsers.statement_parser import StatementParser
from plyj.parsers.type_parser import TypeParser


class JavaParser(ExpressionParser, NameParser, LiteralParser, TypeParser,
ClassParser, StatementParser, CompilationUnitParser):
"""
There should be no reason to use this class directly. Please use Parser in
parser.py

This class implements the Java parser for YACC.
"""
tokens = JavaLexer.tokens

@staticmethod
def p_goal_compilation_unit(p):
"""goal : PLUSPLUS compilation_unit"""
p[0] = p[2]

@staticmethod
def p_goal_expression(p):
"""goal : MINUSMINUS expression"""
p[0] = p[2]

@staticmethod
def p_goal_statement(p):
"""goal : '*' block_statement"""
p[0] = p[2]

@staticmethod
def p_error(p):
print('error: {}'.format(p))

@staticmethod
def p_empty(p):
"""empty :"""
Loading