Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bac 2 review route #247

Open
wants to merge 2 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
The diff you're trying to view is too large. We only load the first 3000 changed files.
Binary file added Backend/.DS_Store
Binary file not shown.
1 change: 1 addition & 0 deletions Backend/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ __pycache__
venv/
lenv/
env/
jenv/
pyvenv.cfg
env

Expand Down
21 changes: 19 additions & 2 deletions Backend/Config/db.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,26 @@
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm import sessionmaker, Session

"""Database connection to xamp server """
DATABASE_URL = "mysql+mysqlconnector://reconcileai:HNG#9reconcileai@localhost:3306/Reconcile"
engine = create_engine(DATABASE_URL)
SessionLocal = sessionmaker(autocommit= False, autoflush=False, bind=engine)
Base = declarative_base()
Base = declarative_base()

def get_db():
db: Session = SessionLocal()


try:
yield db
db.commit()

except Exception:
db.rollback()

finally:
db.close()



110 changes: 55 additions & 55 deletions Backend/controllers/file_storage.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,55 @@
#!/usr/bin/env python3
"""Module to define a serialization/deserialization engine
that saves objects to a file"""
import json
from os import path as os_path


class FileStorage:
"""class that defines a file storage engine
for JSON serialization"""
__file_path = "backend/reviews.json"
__objects = {}

def all(self) -> dict:
"""returns __objects"""
return FileStorage.__objects

def new(self, obj) -> None:
"""sets a new obj in __objects"""
FileStorage.__objects[
f"{obj.__class__.__name__}.{obj.id}"] = obj

def save(self):
"""serializes __objects to a JSON file in __file_path"""
with open(FileStorage.__file_path, 'w') as f:
temp = {}
temp.update(FileStorage.__objects)
for key, val in temp.items():
temp[key] = val.dict()
json.dump(temp, f)

def reload(self):
"""deserializes a json file at __file_path and save
it in __objects"""
models = import_models()
if os_path.exists(FileStorage.__file_path):
with open(f"{FileStorage.__file_path}", "r") as f:
json_str = f.read()
if len(json_str) == 0:
return
loaded_dict = json.loads(json_str)
FileStorage.__objects.clear()
for key, obj_dict in loaded_dict.items():
obj_class = models[key.split(".")[0]]
FileStorage.__objects[key] = obj_class(**obj_dict)


def import_models():
"""imports the modules locally when called
to avoid circular import"""
from models.review import Review
models = {
"Review": Review
}
return models
# #!/usr/bin/env python3
# """Module to define a serialization/deserialization engine
# that saves objects to a file"""
# import json
# from os import path as os_path


# class FileStorage:
# """class that defines a file storage engine
# for JSON serialization"""
# __file_path = "backend/reviews.json"
# __objects = {}

# def all(self) -> dict:
# """returns __objects"""
# return FileStorage.__objects

# def new(self, obj) -> None:
# """sets a new obj in __objects"""
# FileStorage.__objects[
# f"{obj.__class__.__name__}.{obj.id}"] = obj

# def save(self):
# """serializes __objects to a JSON file in __file_path"""
# with open(FileStorage.__file_path, 'w') as f:
# temp = {}
# temp.update(FileStorage.__objects)
# for key, val in temp.items():
# temp[key] = val.dict()
# json.dump(temp, f)

# def reload(self):
# """deserializes a json file at __file_path and save
# it in __objects"""
# models = import_models()
# if os_path.exists(FileStorage.__file_path):
# with open(f"{FileStorage.__file_path}", "r") as f:
# json_str = f.read()
# if len(json_str) == 0:
# return
# loaded_dict = json.loads(json_str)
# FileStorage.__objects.clear()
# for key, obj_dict in loaded_dict.items():
# obj_class = models[key.split(".")[0]]
# FileStorage.__objects[key] = obj_class(**obj_dict)


# def import_models():
# """imports the modules locally when called
# to avoid circular import"""
# from models.review import Review
# models = {
# "Review": Review
# }
# return models
132 changes: 66 additions & 66 deletions Backend/controllers/matching.py
Original file line number Diff line number Diff line change
@@ -1,80 +1,80 @@
#!/usr/bin/python3
""" DOCUMENT MATCHING MODULE """
import json
import pandas as pd
import pickle
import warnings
# #!/usr/bin/python3
# """ DOCUMENT MATCHING MODULE """
# import json
# import pandas as pd
# import pickle
# import warnings

from .convert_file import convert_file
from sentence_transformers import SentenceTransformer, util
import torch
# from .convert_file import convert_file
# from sentence_transformers import SentenceTransformer, util
# import torch

warnings.filterwarnings("ignore")
# warnings.filterwarnings("ignore")

try:
with open('embedder.pkl', 'rb') as f:
embedder = pickle.load(f)
except BaseException:
embedder = SentenceTransformer('msmarco-distilbert-base-tas-b')
with open('embedder.pkl', 'wb') as f:
pickle.dump(embedder, f)
# try:
# with open('embedder.pkl', 'rb') as f:
# embedder = pickle.load(f)
# except BaseException:
# embedder = SentenceTransformer('msmarco-distilbert-base-tas-b')
# with open('embedder.pkl', 'wb') as f:
# pickle.dump(embedder, f)


def bertmatch(file1, file2):
"""Matches similar transactions in two documents
# def bertmatch(file1, file2):
# """Matches similar transactions in two documents

Args:
file1: first document uploaded
file2: second document uploaded
# Args:
# file1: first document uploaded
# file2: second document uploaded

Return:
object: json
"""
# Return:
# object: json
# """

records_table = convert_file(file2)
records_table = pd.read_json(records_table)
records_table['corpus'] = records_table[records_table.columns].apply(
lambda row: ' '.join(row.values.astype(str)), axis=1)
corpus = records_table['corpus'].to_list()
corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
# records_table = convert_file(file2)
# records_table = pd.read_json(records_table)
# records_table['corpus'] = records_table[records_table.columns].apply(
# lambda row: ' '.join(row.values.astype(str)), axis=1)
# corpus = records_table['corpus'].to_list()
# corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)

statement_table = convert_file(file1)
statement_table = pd.read_json(statement_table)
queries = []
for i in statement_table.index:
queries.append(" ".join(list(statement_table.loc[i].astype(str))))
# Query sentences:
# statement_table = convert_file(file1)
# statement_table = pd.read_json(statement_table)
# queries = []
# for i in statement_table.index:
# queries.append(" ".join(list(statement_table.loc[i].astype(str))))
# # Query sentences:

response = []
pool = {}
# Find the closest 5 sentences of the corpus for each query sentence based
# on cosine similarity
top_k = min(1, len(corpus))
for i, query in enumerate(queries):
query_embedding = embedder.encode(query, convert_to_tensor=True)
# response = []
# pool = {}
# # Find the closest 5 sentences of the corpus for each query sentence based
# # on cosine similarity
# top_k = min(1, len(corpus))
# for i, query in enumerate(queries):
# query_embedding = embedder.encode(query, convert_to_tensor=True)

# We use cosine-similarity and torch.topk to find the highest 5 scores
cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0]
top_results = torch.topk(cos_scores, k=top_k)
# # We use cosine-similarity and torch.topk to find the highest 5 scores
# cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0]
# top_results = torch.topk(cos_scores, k=top_k)

# print("\n\n======================\n\n")
# print("Query:", query)
# print("\nTop 5 most similar sentences in corpus:")
# # print("\n\n======================\n\n")
# # print("Query:", query)
# # print("\nTop 5 most similar sentences in corpus:")

for score, idx in zip(top_results[0], top_results[1]):
x = records_table.loc[idx.tolist()].to_dict()
del x['corpus']
if score > 0.5999:
pool['Matched'] = True
pool['Matched_details'] = [x]
pool["Certainty"] = "{:.4f}".format(score)
else:
pool['Matched'] = False
pool['Matched_details'] = [{i: "" for i in x}]
pool["Certainty"] = ""
x = statement_table.loc[i].to_dict()
x.update(pool)
response.append(x)
# for score, idx in zip(top_results[0], top_results[1]):
# x = records_table.loc[idx.tolist()].to_dict()
# del x['corpus']
# if score > 0.5999:
# pool['Matched'] = True
# pool['Matched_details'] = [x]
# pool["Certainty"] = "{:.4f}".format(score)
# else:
# pool['Matched'] = False
# pool['Matched_details'] = [{i: "" for i in x}]
# pool["Certainty"] = ""
# x = statement_table.loc[i].to_dict()
# x.update(pool)
# response.append(x)

json_object = json.dumps(response, default=str, indent=4)
return json_object
# json_object = json.dumps(response, default=str, indent=4)
# return json_object
Loading