Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/issue 192/fix fulltext api bug 1 #201

Merged
merged 4 commits into from
Jul 18, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix(fulltext api): make clearer error message which combinations of f…
…ilters are allowed, and add test
davidpomerenke committed Jul 18, 2024
commit 97c700e4f9d428ee044eeaca48d702846c8e9dfc
15 changes: 10 additions & 5 deletions backend-python/media_impact_monitor/fulltexts.py
Original file line number Diff line number Diff line change
@@ -28,26 +28,31 @@
def get_fulltexts(q: FulltextSearch) -> pd.DataFrame | None:
assert q.topic or q.organizers or q.query or q.event_id
keywords = load_keywords()
num_filters = sum(
[bool(q.topic), bool(q.organizers), bool(q.query), bool(q.event_id)]
)
if num_filters > 1:
raise ValueError(
"Only one of 'topic', 'organizers', 'query', 'event_id' is allowed."
)
if q.topic:
assert q.topic == "climate_change"
assert not q.query and not q.organizers and not q.event_id
assert (
q.topic == "climate_change"
), "Only 'climate_change' is supported as topic."
query = xs(
keywords["climate_science"]
+ keywords["climate_policy"]
+ keywords["climate_urgency"],
q.media_source,
)
if q.organizers:
assert not q.topic and not q.query and not q.event_id
for org in q.organizers:
assert org in climate_orgs, f"Unknown organization: {org}"
orgs = add_quotes(add_aliases(q.organizers))
query = xs_with_ys(orgs, keywords["activism"], q.media_source)
if q.query:
assert not q.topic and not q.organizers and not q.event_id
query = q.query
if q.event_id:
assert not q.topic and not q.query and not q.organizers
events = get_events_by_id([q.event_id])
assert len(events) == 1
event = events.iloc[0]
35 changes: 29 additions & 6 deletions backend-python/media_impact_monitor/fulltexts_test.py
Original file line number Diff line number Diff line change
@@ -39,9 +39,32 @@ def test_get_fulltexts_for_event():
assert (texts["date"] <= date(2024, 5, 18)).all()


# def test_get_mediacloud_fulltexts():
# start_date = date(2024, 5, 20)
# query = '"letzte generation"'
# fulltexts = get_mediacloud_fulltexts(
# query=query, start_date=start_date, countries=["Germany"]
# )
def test_get_fulltexts_with_too_many_params():
with pytest.raises(ValueError) as e:
get_fulltexts(
FulltextSearch(
media_source="news_online",
topic="climate_change",
start_date=date(2023, 1, 1),
end_date=date(2024, 1, 31),
event_id="adb689988aa3e61021da64570bda6d95",
)
)
assert (
str(e.value)
== "Only one of 'topic', 'organizers', 'query', 'event_id' is allowed."
)


def test_get_fulltexts_for_climate_change():
texts = get_fulltexts(
FulltextSearch(
media_source="news_online",
topic="climate_change",
start_date=date(2023, 1, 1),
end_date=date(2023, 1, 2),
)
)
assert texts is not None
assert len(texts) > 0
assert all(date(2023, 1, 1) <= text.date <= date(2023, 1, 2) for text in texts)
4 changes: 4 additions & 0 deletions backend-python/media_impact_monitor/types_.py
Original file line number Diff line number Diff line change
@@ -156,6 +156,10 @@ class PolicySearch(BaseModel):


class FulltextSearch(BaseModel):
"""
You can set parameters for medmedia_source and date_range, and filter by one of the following: topic, organizers, query, or event_id. For now you cannot combine the latter filters, since they all affect the query in different ways.
"""

media_source: MediaSource = Field(
description="The data source for the media data (i.e., online news, print news, etc.)."
)