diff --git a/.vscode/launch.json b/.vscode/launch.json index a722c7f9..ade89e71 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -19,6 +19,7 @@ "media_impact_monitor.api:app", "--host=0.0.0.0", "--port=8000", + "--reload", ], "jinja": true } diff --git a/.vscode/settings.json b/.vscode/settings.json index ae896538..1f46fc1a 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,8 +1,13 @@ { "python.defaultInterpreterPath": "${workspaceFolder}/backend-python/.venv", - "workbench.colorCustomizations": { - "titleBar.activeBackground": "#042F2D", - "titleBar.activeForeground": "#ffffff" - }, - "jupyter.notebookFileRoot": "${workspaceFolder}/backend-python/media_impact_monitor" -} + "workbench.colorCustomizations": { + "titleBar.activeBackground": "#042F2D", + "titleBar.activeForeground": "#ffffff" + }, + "jupyter.notebookFileRoot": "${workspaceFolder}/backend-python/media_impact_monitor", + "python.testing.pytestArgs": [ + "backend-python/media_impact_monitor" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} \ No newline at end of file diff --git a/backend-python/media_impact_monitor/data_loaders/protest/acled.py b/backend-python/media_impact_monitor/data_loaders/protest/acled.py index a578ecae..2b54006c 100644 --- a/backend-python/media_impact_monitor/data_loaders/protest/acled.py +++ b/backend-python/media_impact_monitor/data_loaders/protest/acled.py @@ -1,8 +1,6 @@ -import os from datetime import date import pandas as pd -from dotenv import load_dotenv from media_impact_monitor.data_loaders.protest.acled_size import ( get_size_number, @@ -12,8 +10,6 @@ from media_impact_monitor.util.date import verify_dates from media_impact_monitor.util.env import ACLED_EMAIL, ACLED_KEY -load_dotenv() - info = """ ACLED (Armed Conflict Location & Event Data Project) is a project that tracks political violence and protest events around the world. The data is collected from reports by local and international news sources, and is updated on a weekly basis. The ACLED API provides access to the data. diff --git a/backend-python/media_impact_monitor/impact.py b/backend-python/media_impact_monitor/impact.py index fb0b131c..b9fd7647 100644 --- a/backend-python/media_impact_monitor/impact.py +++ b/backend-python/media_impact_monitor/impact.py @@ -33,6 +33,13 @@ def get_impact(q: ImpactSearch) -> Impact: end_date=q.end_date, ) ) + n_event_days = events["date"].nunique() + if n_event_days < 5: + return Impact( + method_applicability=False, + method_limitations=["Not enough events to estimate impact."], + impact_estimates=None, + ) q.impacted_trend.start_date = q.start_date q.impacted_trend.end_date = q.end_date trends = get_trend(TrendSearch(**dict(q.impacted_trend))) @@ -55,7 +62,7 @@ def get_impact(q: ImpactSearch) -> Impact: assert ( len(set([str(lims) for lims in lims_list])) == 1 ), "All topics should have same limitations." - n_days = 14 - 1 + n_days = 7 - 1 return Impact( method_applicability=applicabilities[0], method_limitations=lims_list[0], diff --git a/backend-python/media_impact_monitor/impact_estimators/time_series_regression.py b/backend-python/media_impact_monitor/impact_estimators/time_series_regression.py index 6757ed09..78123874 100644 --- a/backend-python/media_impact_monitor/impact_estimators/time_series_regression.py +++ b/backend-python/media_impact_monitor/impact_estimators/time_series_regression.py @@ -20,7 +20,7 @@ def add_emws(df: pd.DataFrame, spans=[1, 2, 7, 30, 90, 365]): """Add new columns with exponentially weighted moving averages.""" emws = pd.DataFrame( { - f"{col}_emw{i}": df.shift(1).ewm(span=i).mean().iloc[:, 0] + f"{col}_emw{i}": df.shift(1).ewm(halflife=i).mean().iloc[:, 0] for i in spans for col in df.columns } @@ -46,10 +46,10 @@ def regress( """Get regression result where the outcome is `day` days after the treatment.""" lags = range(1, lags + 1) media_df = pd.DataFrame(media_df, columns=["count"]) - protest_df = add_lags(protest_df, lags=lags) - media_df = add_lags(media_df, lags=lags) + # protest_df = add_lags(protest_df, lags=[]) + media_df = add_lags(media_df, lags=[4,5,6,7,8]) # protest_df = add_emws(protest_df) - media_df = add_emws(media_df, spans=[7, 30, 90]) + # media_df = add_emws(media_df, spans=[14]) df = pd.concat([protest_df, media_df], axis=1) df = add_weekday_dummies(df) treatment = "protest" @@ -63,16 +63,20 @@ def regress( else: df[outcome] = df[outcome].rolling(day + 1).sum() df = df.dropna() + placebo = False + if placebo: + df[treatment] = df.sample(frac=1)[treatment].to_list() X = df.drop(columns=[outcome]) y = df[outcome] model = sm.OLS(y, sm.add_constant(X)) model = model.fit(cov_type="HC3") + alpha = 0.1 return { "date": day, "mean": model.params[treatment], "p": model.pvalues[treatment], - "ci_lower": model.conf_int()[0][treatment], - "ci_upper": model.conf_int()[1][treatment], + "ci_lower": model.conf_int(alpha=alpha)[0][treatment], + "ci_upper": model.conf_int(alpha=alpha)[1][treatment], } diff --git a/backend-python/media_impact_monitor/issue_keywords.yaml b/backend-python/media_impact_monitor/issue_keywords.yaml index 05d88d57..3a384218 100644 --- a/backend-python/media_impact_monitor/issue_keywords.yaml +++ b/backend-python/media_impact_monitor/issue_keywords.yaml @@ -15,15 +15,18 @@ activism: - mahnwache - hungerstreik - ziviler ungehorsam -climate_science: +climate_general: - klimawandel - klimaerwärmung - erderwärmung - klimaschutz - - klimagerechtigkeit - - klimapolitik - - klimaneutral* +climate_science: + - klimaforsch* + - klimawissenschaft* + - erderwärmung + - ipcc climate_policy: + - klimapoliti* - klimaneutral* - klimaziel* - klimaschutzpaket @@ -41,6 +44,7 @@ climate_policy: - neun-euro-ticket - vergesellschaftung - schuldenschnitt + - klimagerechtigkeit climate_urgency: - klimakrise - klimakatastrophe diff --git a/backend-python/media_impact_monitor/trends/keyword_trend.py b/backend-python/media_impact_monitor/trends/keyword_trend.py index ac414023..94e8fa72 100644 --- a/backend-python/media_impact_monitor/trends/keyword_trend.py +++ b/backend-python/media_impact_monitor/trends/keyword_trend.py @@ -1,5 +1,3 @@ -from datetime import date - import pandas as pd import yaml @@ -9,7 +7,6 @@ from media_impact_monitor.data_loaders.news_print.genios import get_genios_counts from media_impact_monitor.data_loaders.web.google_trends import get_google_trends_counts from media_impact_monitor.types_ import TrendSearch -from media_impact_monitor.util.cache import cache from media_impact_monitor.util.paths import src @@ -56,19 +53,19 @@ def load_keywords(): def topic_queries(media_source: str) -> dict[str, str]: keywords = load_keywords() keyword_queries = { - "science": xs(keywords["climate_science"], media_source), - "policy": xs(keywords["climate_policy"], media_source), - "urgency": xs(keywords["climate_urgency"], media_source), - "all_excl_activism": xs_without_ys( - keywords["climate_science"] - + keywords["climate_policy"] - + keywords["climate_urgency"], - keywords["activism"], - media_source, - ), + "climate policy": xs(keywords["climate_policy"], media_source), + "climate science": xs(keywords["climate_science"], media_source), + "climate crisis framing": xs(keywords["climate_urgency"], media_source), + # "all_excl_activism": xs_without_ys( + # keywords["climate_science"] + # + keywords["climate_policy"] + # + keywords["climate_urgency"], + # keywords["activism"], + # media_source, + # ), } if media_source != "web_google": - keyword_queries["activism"] = xs_with_ys( + keyword_queries["climate activism"] = xs_with_ys( keywords["climate_science"] + keywords["climate_policy"] + keywords["climate_urgency"], diff --git a/backend-python/media_impact_monitor/trends/keyword_trend_test.py b/backend-python/media_impact_monitor/trends/keyword_trend_test.py index e9deefd3..7292b47b 100644 --- a/backend-python/media_impact_monitor/trends/keyword_trend_test.py +++ b/backend-python/media_impact_monitor/trends/keyword_trend_test.py @@ -4,13 +4,14 @@ def test_topic_queries(): for media_source in ["news_online", "news_print"]: queries = topic_queries(media_source) - assert queries["science"].startswith( - "klimawandel OR klimaerwärmung OR erderwärmung" + assert queries["climate science"].startswith( + 'klimaforsch* OR klimawissenschaft*' ) - assert queries["policy"].startswith( - "klimaneutral* OR klimaziel* OR klimaschutzpaket" + assert queries["climate policy"].startswith( + "klimapoliti* OR klimaneutral* OR klimaziel*" ) - assert '"erneuerbare energie*"' in queries["policy"] + assert '"erneuerbare energie*"' in queries["climate policy"] queries = topic_queries("web_google") - assert "-\\*protest*" in queries["all_excl_activism"] - assert "+klimawandel" in queries["all_excl_activism"] + assert "+erderwärmung" in queries["climate science"] + # assert "-\\*protest*" in queries["all_excl_activism"] + # assert "+klimawandel" in queries["all_excl_activism"] diff --git a/backend-python/media_impact_monitor/util/env.py b/backend-python/media_impact_monitor/util/env.py index 07a4af9c..6964a7ba 100644 --- a/backend-python/media_impact_monitor/util/env.py +++ b/backend-python/media_impact_monitor/util/env.py @@ -2,7 +2,10 @@ from dotenv import load_dotenv -load_dotenv() +# read environment variables from .env file +# see .env.example for the required variables +# override any existing environment variables +load_dotenv(override=True) ACLED_EMAIL = environ["ACLED_EMAIL"] ACLED_KEY = environ["ACLED_KEY"] diff --git a/frontend-observable/package-lock.json b/frontend-observable/package-lock.json index 220986b0..3e76429c 100644 --- a/frontend-observable/package-lock.json +++ b/frontend-observable/package-lock.json @@ -1,5 +1,5 @@ { - "name": "frontend-alpha", + "name": "frontend-observable", "lockfileVersion": 3, "requires": true, "packages": { diff --git a/frontend-observable/src/impacts.md b/frontend-observable/src/impacts.md index b19f760f..3c5e8e7d 100644 --- a/frontend-observable/src/impacts.md +++ b/frontend-observable/src/impacts.md @@ -12,7 +12,8 @@ let impact = await queryApi('impact', { media_source: 'news_print', topic: 'climate_change' }, - organizer: 'Last Generation (Germany)', + organizer: 'Fridays for Future', + start_date: '2020-04-10', end_date: '2022-04-30' }) display(impact)