Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/issue 192/fix fulltext api bug 1 #201

Merged
merged 4 commits into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend-python/media_impact_monitor/fulltext_coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from media_impact_monitor.util.llm import completion

system_prompt = "You're a sentiment analysis tool. For a given user input, always return the sentiment of the input. Return -1 for negative, 0 for neutral, and 1 for positive. Before you make your decision, reason about the decision."
system_prompt = """You're a sentiment analysis tool. For a given user input, always return the sentiment of the input. Return -1 for negative, 0 for neutral, and 1 for positive. Before you make your decision, reason about the decision. Stick exactly to the specified JSON schema including the "sentiment_reasoning" and "sentiment" fields."""

tools = [
{
Expand Down
15 changes: 10 additions & 5 deletions backend-python/media_impact_monitor/fulltexts.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,26 +28,31 @@
def get_fulltexts(q: FulltextSearch) -> pd.DataFrame | None:
assert q.topic or q.organizers or q.query or q.event_id
keywords = load_keywords()
num_filters = sum(
[bool(q.topic), bool(q.organizers), bool(q.query), bool(q.event_id)]
)
if num_filters > 1:
raise ValueError(
"Only one of 'topic', 'organizers', 'query', 'event_id' is allowed."
)
if q.topic:
assert q.topic == "climate_change"
assert not q.query and not q.organizers and not q.event_id
assert (
q.topic == "climate_change"
), "Only 'climate_change' is supported as topic."
query = xs(
keywords["climate_science"]
+ keywords["climate_policy"]
+ keywords["climate_urgency"],
q.media_source,
)
if q.organizers:
assert not q.topic and not q.query and not q.event_id
for org in q.organizers:
assert org in climate_orgs, f"Unknown organization: {org}"
orgs = add_quotes(add_aliases(q.organizers))
query = xs_with_ys(orgs, keywords["activism"], q.media_source)
if q.query:
assert not q.topic and not q.organizers and not q.event_id
query = q.query
if q.event_id:
assert not q.topic and not q.query and not q.organizers
events = get_events_by_id([q.event_id])
assert len(events) == 1
event = events.iloc[0]
Expand Down
35 changes: 29 additions & 6 deletions backend-python/media_impact_monitor/fulltexts_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,32 @@ def test_get_fulltexts_for_event():
assert (texts["date"] <= date(2024, 5, 18)).all()


# def test_get_mediacloud_fulltexts():
# start_date = date(2024, 5, 20)
# query = '"letzte generation"'
# fulltexts = get_mediacloud_fulltexts(
# query=query, start_date=start_date, countries=["Germany"]
# )
def test_get_fulltexts_with_too_many_params():
with pytest.raises(ValueError) as e:
get_fulltexts(
FulltextSearch(
media_source="news_online",
topic="climate_change",
start_date=date(2023, 1, 1),
end_date=date(2024, 1, 31),
event_id="adb689988aa3e61021da64570bda6d95",
)
)
assert (
str(e.value)
== "Only one of 'topic', 'organizers', 'query', 'event_id' is allowed."
)


def test_get_fulltexts_for_climate_change():
texts = get_fulltexts(
FulltextSearch(
media_source="news_online",
topic="climate_change",
start_date=date(2023, 1, 1),
end_date=date(2023, 1, 2),
)
)
assert texts is not None
assert len(texts) > 0
assert all(date(2023, 1, 1) <= text.date <= date(2023, 1, 2) for text in texts)
4 changes: 4 additions & 0 deletions backend-python/media_impact_monitor/types_.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,10 @@ class PolicySearch(BaseModel):


class FulltextSearch(BaseModel):
"""
You can set parameters for media_source and date_range, and filter by one of the following: topic, organizers, query, or event_id. For now you cannot combine the latter filters, since they all affect the query in different ways.
"""

media_source: MediaSource = Field(
description="The data source for the media data (i.e., online news, print news, etc.)."
)
Expand Down
Loading