Skip to content

Commit

Permalink
Programming/AP-TED: Add AP-TED Algorithm and Code to AST Converter (#275
Browse files Browse the repository at this point in the history
)
  • Loading branch information
marlon-luca-bu authored Jun 11, 2024
1 parent 8a75867 commit 2a089a5
Show file tree
Hide file tree
Showing 34 changed files with 26,128 additions and 17 deletions.
1 change: 1 addition & 0 deletions .prospector.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ ignore-paths:
- module_text_cofee/module_text_cofee/protobuf
- module_programming_themisml/module_programming_themisml/extract_methods/languages
- module_programming_themisml/module_programming_themisml/extract_methods/method_parser_listener.py
- module_programming_ast/module_programming_ast/convert_code_to_ast/languages

mypy:
run: true
Expand Down
2 changes: 1 addition & 1 deletion athena/athena/programming.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@

__all__ = [
"Exercise", "Submission", "Feedback",
"get_stored_exercises", "get_stored_submissions", "get_stored_feedback", "get_stored_feedback_suggestions"
"get_stored_exercises", "get_stored_submissions", "get_stored_feedback", "get_stored_feedback_suggestions", "count_stored_submissions"
]
79 changes: 64 additions & 15 deletions module_programming_ast/module_programming_ast/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@
import random
from typing import List, Any, cast
from pydantic import BaseModel, Field

from athena import app, config_schema_provider, submissions_consumer, submission_selector, feedback_consumer, feedback_provider, evaluation_provider, emit_meta
from module_programming_ast.convert_code_to_ast.get_feedback_methods import get_feedback_method
from module_programming_ast.feedback_suggestions.feedback_suggestions import create_feedback_suggestions
from athena import (app, config_schema_provider, submissions_consumer, submission_selector, feedback_consumer,
feedback_provider, evaluation_provider, emit_meta)
from athena.logger import logger
from athena.storage import store_exercise, store_submissions, store_feedback
from athena.programming import Exercise, Submission, Feedback, get_stored_feedback_suggestions, count_stored_submissions
from athena.storage import store_exercise, store_submissions, store_feedback, store_feedback_suggestions
from athena.programming import (Exercise, Submission, Feedback, get_stored_feedback_suggestions,
count_stored_submissions, get_stored_submissions)
from module_programming_ast.remove_overlapping import filter_overlapping_suggestions
from module_programming_ast.remove_suspicious import filter_suspicious

Expand Down Expand Up @@ -62,18 +65,65 @@ def select_submission(exercise: Exercise, submissions: List[Submission]) -> Subm

@feedback_consumer
def process_incoming_feedback(exercise: Exercise, submission: Submission, feedbacks: List[Feedback]):
logger.info("process_feedback: Received feedbacks for submission %d of exercise %d", submission.id, exercise.id)
logger.info("process_feedback: Received %d feedbacks for submission %d of exercise %d", len(feedbacks),
submission.id, exercise.id)
logger.info("process_feedback: Feedbacks: %s", feedbacks)
# Do something with the feedback
# Add data to feedback

programming_language = exercise.programming_language.lower()
# Currently only works with Java and Python - can be extended with more languages if the grammar is available
if programming_language not in ["java", "python"]:
logger.info("AP-TED currently only works with Java and Python. Not consuming feedback.")
return

# Remove unreferenced feedbacks
feedbacks = list(filter(lambda f: f.file_path is not None and f.line_start is not None, feedbacks))

# Add method metadata to feedbacks
feedbacks_with_method = []
for feedback in feedbacks:
feedback_method = get_feedback_method(submission, feedback, programming_language)
if feedback_method is None:
# don't consider feedback without a method
continue
logger.debug("Feedback #%d: Found method %s", feedback.id, feedback_method.name)
feedback.meta["method_name"] = feedback_method.name
feedback.meta["method_code"] = feedback_method.source_code
feedback.meta["method_line_start"] = feedback_method.line_start
feedback.meta["method_line_end"] = feedback_method.line_end
feedback.meta["method_ast"] = feedback_method.ast
feedbacks_with_method.append(feedback)
feedbacks = feedbacks_with_method

# find all submissions for this exercise
exercise_submissions = cast(List[Submission], list(get_stored_submissions(exercise.id)))

# create feedback suggestions
logger.info("Creating feedback suggestions for %d feedbacks", len(feedbacks))
feedback_suggestions = create_feedback_suggestions(exercise_submissions, feedbacks, programming_language)

# additionally, store metadata about how impactful each feedback was, i.e. how many suggestions were given based on it
for feedback in feedbacks:
# count how many suggestions were given based on this feedback
feedback.meta["n_feedback_suggestions"] = len(
[f for f in feedback_suggestions if f.meta["original_feedback_id"] == feedback.id])
# store the information on the suggestions as well for quicker access later
for suggestion in feedback_suggestions:
if suggestion.meta["original_feedback_id"] == feedback.id:
suggestion.meta["n_feedback_suggestions"] = feedback.meta["n_feedback_suggestions"]

# save to database
# type: ignore
store_feedback_suggestions(feedback_suggestions)
for feedback in feedbacks:
feedback.meta["some_data"] = "some_value"
store_feedback(feedback)

logger.debug("Feedbacks processed")


@feedback_provider
def suggest_feedback(exercise: Exercise, submission: Submission, module_config: Configuration) -> List[Feedback]:
logger.info("suggest_feedback: Suggestions for submission %d of exercise %d were requested", submission.id, exercise.id)
def suggest_feedback(exercise: Exercise, submission: Submission) -> List[Feedback]:
logger.info("suggest_feedback: Suggestions for submission %d of exercise %d were requested", submission.id,
exercise.id)
# Do something with the submission and return a list of feedback
# ThemisML currently only works with Java
if exercise.programming_language.lower() != "java" or exercise.programming_language.lower() != "python":
Expand All @@ -92,20 +142,19 @@ def suggest_feedback(exercise: Exercise, submission: Submission, module_config:

return suggested_feedbacks



# Only if it makes sense for a module (Optional)
@evaluation_provider
def evaluate_feedback(exercise: Exercise, submission: Submission, true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback]) -> Any:
def evaluate_feedback(exercise: Exercise, submission: Submission, true_feedbacks: List[Feedback],
predicted_feedbacks: List[Feedback]) -> Any:
logger.info(
"evaluate_feedback: Evaluation for submission %d of exercise %d was requested with %d true and %d predicted feedbacks",
"evaluate_feedback: Evaluation for submission %d of exercise %d was requested with %d true and %d predicted feedbacks",
submission.id, exercise.id, len(true_feedbacks), len(predicted_feedbacks)
)

# Do something with the true and predicted feedback and return the evaluation result
# Generate some example evaluation result
evaluation_results = []
true_feedback_embeddings = [random.random() for _ in true_feedbacks]
true_feedback_embeddings = [random.random() for _ in true_feedbacks]
predicted_feedback_embeddings = [random.random() for _ in predicted_feedbacks]
for feedback, embedding in zip(predicted_feedbacks, predicted_feedback_embeddings):
feedback_evaluation = {
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from antlr4 import CommonTokenStream, InputStream
from antlr4.tree.Tree import ParseTreeWalker
from module_programming_ast.convert_code_to_ast.languages.python.Python3Lexer import Python3Lexer
from module_programming_ast.convert_code_to_ast.languages.python.Python3Parser import Python3Parser
from module_programming_ast.convert_code_to_ast.languages.java.JavaLexer import JavaLexer
from module_programming_ast.convert_code_to_ast.languages.java.JavaParser import JavaParser
from module_programming_ast.convert_code_to_ast.languages.python.Python3MethodParserListener import \
MethodParserListener as PythonMethodParserListener
from module_programming_ast.convert_code_to_ast.languages.java.JavaMethodParserListener import \
MethodParserListener as JavaMethodParserListener

# TODO: DO I need the to_ast method?

# Grammars for programming languages have different parse rules
JAVA_PARSE_RULE = "compilationUnit"
PYTHON_PARSE_RULE = "file_input"


# class ASTNode:
# def __init__(self, name):
# self.name = name
# self.children = []
#
# def add_child(self, child):
# self.children.append(child)
#
# def __repr__(self):
# return f"{self.name}{self.children}"
#
#
# def to_ast(node):
# if isinstance(node, TerminalNodeImpl):
# return ASTNode(node.getText())
# ast_node = ASTNode(type(node).__name__.replace('Context', ''))
# for i in range(node.getChildCount()):
# ast_node.add_child(to_ast(node.getChild(i)))
# return ast_node


def parse_java_file(source_code: str):
return parse_file(source_code, JavaLexer, JavaParser, JAVA_PARSE_RULE, JavaMethodParserListener)


def parse_python_file(source_code: str):
return parse_file(source_code, Python3Lexer, Python3Parser, PYTHON_PARSE_RULE, PythonMethodParserListener)


def parse_file(source_code, lexer_class, parser_class, parse_rule, listener_class):
input_stream = InputStream(source_code)
lexer = lexer_class(input_stream)
stream = CommonTokenStream(lexer)
parser = parser_class(stream)
tree = getattr(parser, parse_rule)()

listener = listener_class(parser)
walker = ParseTreeWalker()
walker.walk(listener, tree)
print(listener.methods)

return listener.methods.copy()


def parse(source_code: str, programming_language: str):
if programming_language == "java":
return parse_java_file(source_code)
if programming_language == "python":
return parse_python_file(source_code)
raise ValueError(f"Unsupported programming language: {programming_language}")


if __name__ == "__main__":
# file_path2 = "../test_files/test_java_1.java"
# parse_java_file(file_path2)

code = """def process_numbers(numbers):
total = 0
for number in numbers:
if number % 2 == 1:
total += number
else:
total -= number
if total > 0:
print("Positive total:", total)
else:
print("Non-positive total:", total)"""
code1 = parse_python_file(code)
code2 = parse_python_file(code)
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import Optional

from athena.programming import Submission, Feedback
from module_programming_ast.convert_code_to_ast.extract_method_and_ast import parse
from module_programming_ast.convert_code_to_ast.method_node import MethodNode
from athena.logger import logger


def get_feedback_method(submission: Submission, feedback: Feedback, programming_language: str) -> Optional[MethodNode]:
"""Find method that the feedback is on"""
if feedback.file_path is None or feedback.line_start is None:
return None
try:
code = submission.get_code(feedback.file_path)
except UnicodeDecodeError:
logger.warning("File %s in submission %d is not UTF-8 encoded.", feedback.file_path, submission.id)
return None
methods = parse(code, programming_language)
for m in methods:
if m.line_start is None or m.line_end is None:
continue
# method has to contain all feedback lines
if m.line_start <= feedback.line_start:
feedback_line_end = feedback.line_end if feedback.line_end is not None else feedback.line_start
if m.line_end >= feedback_line_end:
return m
return None
Empty file.
Loading

0 comments on commit 2a089a5

Please sign in to comment.