Skip to content

Commit

Permalink
Merge pull request #204 from SocialChangeLab/fix-impact
Browse files Browse the repository at this point in the history
Fix impact
  • Loading branch information
davidpomerenke authored Jul 21, 2024
2 parents 76f0b97 + 5060dda commit dfd2afd
Show file tree
Hide file tree
Showing 11 changed files with 64 additions and 45 deletions.
1 change: 1 addition & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"media_impact_monitor.api:app",
"--host=0.0.0.0",
"--port=8000",
"--reload",
],
"jinja": true
}
Expand Down
17 changes: 11 additions & 6 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
{
"python.defaultInterpreterPath": "${workspaceFolder}/backend-python/.venv",
"workbench.colorCustomizations": {
"titleBar.activeBackground": "#042F2D",
"titleBar.activeForeground": "#ffffff"
},
"jupyter.notebookFileRoot": "${workspaceFolder}/backend-python/media_impact_monitor"
}
"workbench.colorCustomizations": {
"titleBar.activeBackground": "#042F2D",
"titleBar.activeForeground": "#ffffff"
},
"jupyter.notebookFileRoot": "${workspaceFolder}/backend-python/media_impact_monitor",
"python.testing.pytestArgs": [
"backend-python/media_impact_monitor"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import os
from datetime import date

import pandas as pd
from dotenv import load_dotenv

from media_impact_monitor.data_loaders.protest.acled_size import (
get_size_number,
Expand All @@ -12,8 +10,6 @@
from media_impact_monitor.util.date import verify_dates
from media_impact_monitor.util.env import ACLED_EMAIL, ACLED_KEY

load_dotenv()

info = """
ACLED (Armed Conflict Location & Event Data Project) is a project that tracks political violence and protest events around the world. The data is collected from reports by local and international news sources, and is updated on a weekly basis. The ACLED API provides access to the data.
Expand Down
9 changes: 8 additions & 1 deletion backend-python/media_impact_monitor/impact.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ def get_impact(q: ImpactSearch) -> Impact:
end_date=q.end_date,
)
)
n_event_days = events["date"].nunique()
if n_event_days < 5:
return Impact(
method_applicability=False,
method_limitations=["Not enough events to estimate impact."],
impact_estimates=None,
)
q.impacted_trend.start_date = q.start_date
q.impacted_trend.end_date = q.end_date
trends = get_trend(TrendSearch(**dict(q.impacted_trend)))
Expand All @@ -55,7 +62,7 @@ def get_impact(q: ImpactSearch) -> Impact:
assert (
len(set([str(lims) for lims in lims_list])) == 1
), "All topics should have same limitations."
n_days = 14 - 1
n_days = 7 - 1
return Impact(
method_applicability=applicabilities[0],
method_limitations=lims_list[0],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def add_emws(df: pd.DataFrame, spans=[1, 2, 7, 30, 90, 365]):
"""Add new columns with exponentially weighted moving averages."""
emws = pd.DataFrame(
{
f"{col}_emw{i}": df.shift(1).ewm(span=i).mean().iloc[:, 0]
f"{col}_emw{i}": df.shift(1).ewm(halflife=i).mean().iloc[:, 0]
for i in spans
for col in df.columns
}
Expand All @@ -46,10 +46,10 @@ def regress(
"""Get regression result where the outcome is `day` days after the treatment."""
lags = range(1, lags + 1)
media_df = pd.DataFrame(media_df, columns=["count"])
protest_df = add_lags(protest_df, lags=lags)
media_df = add_lags(media_df, lags=lags)
# protest_df = add_lags(protest_df, lags=[])
media_df = add_lags(media_df, lags=[4,5,6,7,8])
# protest_df = add_emws(protest_df)
media_df = add_emws(media_df, spans=[7, 30, 90])
# media_df = add_emws(media_df, spans=[14])
df = pd.concat([protest_df, media_df], axis=1)
df = add_weekday_dummies(df)
treatment = "protest"
Expand All @@ -63,16 +63,20 @@ def regress(
else:
df[outcome] = df[outcome].rolling(day + 1).sum()
df = df.dropna()
placebo = False
if placebo:
df[treatment] = df.sample(frac=1)[treatment].to_list()
X = df.drop(columns=[outcome])
y = df[outcome]
model = sm.OLS(y, sm.add_constant(X))
model = model.fit(cov_type="HC3")
alpha = 0.1
return {
"date": day,
"mean": model.params[treatment],
"p": model.pvalues[treatment],
"ci_lower": model.conf_int()[0][treatment],
"ci_upper": model.conf_int()[1][treatment],
"ci_lower": model.conf_int(alpha=alpha)[0][treatment],
"ci_upper": model.conf_int(alpha=alpha)[1][treatment],
}


Expand Down
12 changes: 8 additions & 4 deletions backend-python/media_impact_monitor/issue_keywords.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,18 @@ activism:
- mahnwache
- hungerstreik
- ziviler ungehorsam
climate_science:
climate_general:
- klimawandel
- klimaerwärmung
- erderwärmung
- klimaschutz
- klimagerechtigkeit
- klimapolitik
- klimaneutral*
climate_science:
- klimaforsch*
- klimawissenschaft*
- erderwärmung
- ipcc
climate_policy:
- klimapoliti*
- klimaneutral*
- klimaziel*
- klimaschutzpaket
Expand All @@ -41,6 +44,7 @@ climate_policy:
- neun-euro-ticket
- vergesellschaftung
- schuldenschnitt
- klimagerechtigkeit
climate_urgency:
- klimakrise
- klimakatastrophe
Expand Down
25 changes: 11 additions & 14 deletions backend-python/media_impact_monitor/trends/keyword_trend.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from datetime import date

import pandas as pd
import yaml

Expand All @@ -9,7 +7,6 @@
from media_impact_monitor.data_loaders.news_print.genios import get_genios_counts
from media_impact_monitor.data_loaders.web.google_trends import get_google_trends_counts
from media_impact_monitor.types_ import TrendSearch
from media_impact_monitor.util.cache import cache
from media_impact_monitor.util.paths import src


Expand Down Expand Up @@ -56,19 +53,19 @@ def load_keywords():
def topic_queries(media_source: str) -> dict[str, str]:
keywords = load_keywords()
keyword_queries = {
"science": xs(keywords["climate_science"], media_source),
"policy": xs(keywords["climate_policy"], media_source),
"urgency": xs(keywords["climate_urgency"], media_source),
"all_excl_activism": xs_without_ys(
keywords["climate_science"]
+ keywords["climate_policy"]
+ keywords["climate_urgency"],
keywords["activism"],
media_source,
),
"climate policy": xs(keywords["climate_policy"], media_source),
"climate science": xs(keywords["climate_science"], media_source),
"climate crisis framing": xs(keywords["climate_urgency"], media_source),
# "all_excl_activism": xs_without_ys(
# keywords["climate_science"]
# + keywords["climate_policy"]
# + keywords["climate_urgency"],
# keywords["activism"],
# media_source,
# ),
}
if media_source != "web_google":
keyword_queries["activism"] = xs_with_ys(
keyword_queries["climate activism"] = xs_with_ys(
keywords["climate_science"]
+ keywords["climate_policy"]
+ keywords["climate_urgency"],
Expand Down
15 changes: 8 additions & 7 deletions backend-python/media_impact_monitor/trends/keyword_trend_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
def test_topic_queries():
for media_source in ["news_online", "news_print"]:
queries = topic_queries(media_source)
assert queries["science"].startswith(
"klimawandel OR klimaerwärmung OR erderwärmung"
assert queries["climate science"].startswith(
'klimaforsch* OR klimawissenschaft*'
)
assert queries["policy"].startswith(
"klimaneutral* OR klimaziel* OR klimaschutzpaket"
assert queries["climate policy"].startswith(
"klimapoliti* OR klimaneutral* OR klimaziel*"
)
assert '"erneuerbare energie*"' in queries["policy"]
assert '"erneuerbare energie*"' in queries["climate policy"]
queries = topic_queries("web_google")
assert "-\\*protest*" in queries["all_excl_activism"]
assert "+klimawandel" in queries["all_excl_activism"]
assert "+erderwärmung" in queries["climate science"]
# assert "-\\*protest*" in queries["all_excl_activism"]
# assert "+klimawandel" in queries["all_excl_activism"]
5 changes: 4 additions & 1 deletion backend-python/media_impact_monitor/util/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

from dotenv import load_dotenv

load_dotenv()
# read environment variables from .env file
# see .env.example for the required variables
# override any existing environment variables
load_dotenv(override=True)

ACLED_EMAIL = environ["ACLED_EMAIL"]
ACLED_KEY = environ["ACLED_KEY"]
Expand Down
2 changes: 1 addition & 1 deletion frontend-observable/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion frontend-observable/src/impacts.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ let impact = await queryApi('impact', {
media_source: 'news_print',
topic: 'climate_change'
},
organizer: 'Last Generation (Germany)',
organizer: 'Fridays for Future',
start_date: '2020-04-10',
end_date: '2022-04-30'
})
display(impact)
Expand Down

0 comments on commit dfd2afd

Please sign in to comment.