Skip to content

Commit

Permalink
Merge pull request #50 from the-deep-nlp/fix/add-data-in-summarization
Browse files Browse the repository at this point in the history
Fixes mock data in text extraction, topicmodel and summarization.
  • Loading branch information
sudan45 authored Jun 28, 2024
2 parents b369256 + dfa4263 commit 08fdcb8
Showing 1 changed file with 112 additions and 8 deletions.
120 changes: 112 additions & 8 deletions analysis_module/mockserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,10 @@
logger.setLevel(logging.INFO)


def get_entries_data(url: str) -> Any:
def get_entries_data(url: str, timeout: int = 30) -> Any:
"""get data"""
response = requests.get(url)
entries_data = json.loads(response.text)
return entries_data
response = requests.get(url, timeout=timeout)
return response.json()


def save_data_local_and_get_url(dir_name: str, client_id: str, data: Any) -> str:
Expand Down Expand Up @@ -132,7 +131,8 @@ def process_summarization(body: dict) -> Any:
callback_url = request_body.get("callback_url")

try:
excerpts = [x["excerpt"] for x in get_entries_data(entries_url)]
input_payload = get_entries_data(entries_url)
excerpts = [x["excerpt"] for x in input_payload["data"]]
except Exception:
send_callback_url_request(
callback_url=callback_url,
Expand All @@ -142,7 +142,14 @@ def process_summarization(body: dict) -> Any:
)
return

data = " ".join(["This is a fake response.\n"] + excerpts)
summary_data = " ".join(["This is a fake response.\n"] + excerpts)
analytical_statement_data = " ".join(["This is autogenerated Analytical Statement.\n"] + excerpts)
info_gaps_data = " ".join(["This is autogenerated info gaps.\n"] + excerpts)
data = {
"summary": summary_data,
"analytical_statement": analytical_statement_data,
"info_gaps": info_gaps_data
}
filepath = save_data_local_and_get_url(
dir_name="summarization", client_id=client_id, data=data
)
Expand Down Expand Up @@ -178,7 +185,10 @@ def process_topicmodeling(body) -> Any:
]

try:
excerpt_ids = [x["entry_id"] for x in get_entries_data(entries_url)]
input_payload = get_entries_data(entries_url)
excerpt_ids = [x["entry_id"] for x in input_payload["data"]]
# To get the list of unique tags
# tags = input_payload["tags"]
except Exception:
send_callback_url_request(
callback_url=callback_url,
Expand Down Expand Up @@ -301,26 +311,120 @@ def process_extraction_mock(body) -> Any:
client_id = document["client_id"]
text_extraction_id = "06b46e2a-00b6-4676-a375-8a7b938a17c6"
random_extracted_text = """
********* [PAGE 1 START] *********
This is some random extracted text.
On Human Rights Day, observed annually on December 10th, Palestinian human rights organizations—The Palestinian
Center for Human Rights, Al Mezan, and Al-Haq—call on the international community to promptly intervene for an
immediate ceasefire, pressure Israel to halt its aggression and genocide in the Gaza Strip and its violations
in the entire occupied Palestinian territory, and ensure accountability and justice.
" ------------------------------------------------------------------------------ "
Given the high concentration of children inRafah- including many who are highly
vulnerable and at the edge of survival- as well as the likely intensity of the
violence,with potential evacuation corridors likely mined or littered with unexploded
ordnance; and shelter and services in areas for elocation very likely to be
limited-UNicEF is warning of a further catastrophe for children,with military
operations resulting in very high civilian casualties and the few remaining basic
services and infrastructure they need to survive being totally destroyed.
" ------------------------------------------------------------------------------ "
As the world marks Human Rights Day today, commemorating the adoption of the Universal
Declaration of Human Rights (UDHR) by the United Nations General Assembly in 1948;
Israel blatantly and systematically violates the majority of the declaration's articles.
It subjects 2.3 million Palestinians in Gaza to a genocidal campaign while
enjoying complete immunity and support from the United States. Despite the US providing Israel with weapons and
munitions and vetoing the UN Security Council resolution calling for an immediate ceasefire in Gaza, the
international community has yet to take effective positions to halt the genocide of an entire people.
********* [PAGE 1 END] *********
********* [PAGE 2 START] *********
In Burundi, around 32,000 refugees-nearly half of the refugee population in the
country -are living in areas affected by the floods, with 500 of them requiring
urgent assistance. In the capital, Bujumbura,refugee families along with many
Burundians,including elderly people, have had to elocate multiple times as water
levels continue to rise.
********* [PAGE 2 END] *********
"""
filepath = save_data_local_and_get_url(
"extraction", client_id, random_extracted_text
)
tables_path = [
{
"page_number": 1,
"order": 0,
"image_link": "https://text-extraction-mock-data.s3.amazonaws.com/timetable.png",
"content_link": "https://text-extraction-mock-data.s3.amazonaws.com/table_timetable.xlsx"
},
{
"page_number": 1,
"order": 1,
"image_link": "https://text-extraction-mock-data.s3.amazonaws.com/risk_analysis.png",
"content_link": "https://text-extraction-mock-data.s3.amazonaws.com/risk_analysis.xlsx"
},
{
"page_number": 2,
"order": 0,
"image_link": "https://text-extraction-mock-data.s3.amazonaws.com/table2.png",
"content_link": "https://text-extraction-mock-data.s3.amazonaws.com/table2.xlsx"
},
{
"page_number": 2,
"order": 1,
"image_link": "https://text-extraction-mock-data.s3.amazonaws.com/table2.png",
"content_link": "https://text-extraction-mock-data.s3.amazonaws.com/table2.xlsx"
},
{
"page_number": 3,
"order": 0,
"image_link": "https://text-extraction-mock-data.s3.amazonaws.com/table2.png",
"content_link": "https://text-extraction-mock-data.s3.amazonaws.com/table2.xlsx"
},
{
"page_number": 3,
"order": 1,
"image_link": "https://text-extraction-mock-data.s3.amazonaws.com/table2.png",
"content_link": "https://text-extraction-mock-data.s3.amazonaws.com/table2.xlsx"
},
{
"page_number": 3,
"order": 2,
"image_link": "https://text-extraction-mock-data.s3.amazonaws.com/table2.png",
"content_link": "https://text-extraction-mock-data.s3.amazonaws.com/table2.xlsx"
},
{
"page_number": 4,
"order": 0,
"image_link": "https://text-extraction-mock-data.s3.amazonaws.com/table2.png",
"content_link": "https://text-extraction-mock-data.s3.amazonaws.com/table2.xlsx"
}
]
images_path = [
{
"page_number": 1,
"images": [
"https://text-extraction-mock-data.s3.amazonaws.com/rose.jpg",
"https://text-extraction-mock-data.s3.amazonaws.com/USAID1619883462_1024.jpg",
"https://text-extraction-mock-data.s3.amazonaws.com/USAID1619883462_1024.jpg",
"https://text-extraction-mock-data.s3.amazonaws.com/USAID1619883462_1024.jpg",
"https://text-extraction-mock-data.s3.amazonaws.com/USAID1619883462_1024.jpg",
"https://text-extraction-mock-data.s3.amazonaws.com/USAID1619883462_1024.jpg",
"https://text-extraction-mock-data.s3.amazonaws.com/USAID1619883462_1024.jpg",
"https://text-extraction-mock-data.s3.amazonaws.com/USAID1619883462_1024.jpg",
"https://text-extraction-mock-data.s3.amazonaws.com/USAID1619883462_1024.jpg",
"https://text-extraction-mock-data.s3.amazonaws.com/USAID1619883462_1024.jpg"
]
},
{
"page_number": 2,
"images": [
"https://text-extraction-mock-data.s3.amazonaws.com/forum.png",
"https://text-extraction-mock-data.s3.amazonaws.com/USAID1619883462_1024.jpg",
"https://text-extraction-mock-data.s3.amazonaws.com/USAID1619883462_1024.jpg"
]
}
]
callback_data = {
"text_path": filepath,
"images_path": [],
"images_path": images_path,
"total_pages": 1,
"tables_path": tables_path,
"total_words_count": 50,
"status": NLPRequest.RequestStatus.SUCCESS,
"client_id": client_id,
Expand Down

0 comments on commit 08fdcb8

Please sign in to comment.