-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(google_trends.py): add google trends data loader
Closes #72
- Loading branch information
1 parent
575b8cb
commit a60354b
Showing
4 changed files
with
232 additions
and
1 deletion.
There are no files selected for viewing
28 changes: 28 additions & 0 deletions
28
backend-python/media_impact_monitor/data_loaders/web/google_trends.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
""" | ||
Documentation of the Google Trends API: | ||
- https://github.com/GeneralMills/pytrends | ||
- https://searchanalysisguide.blogspot.com/2013/04/google-trends-what-is-partial-data.html | ||
There is also regional data, useful for synthetic control. | ||
For the last 90 days, data is also available with daily resolution; otherwise only weekly. | ||
""" | ||
|
||
from time import sleep | ||
|
||
from media_impact_monitor.util.cache import cache | ||
from pytrends.request import TrendReq | ||
|
||
|
||
@cache | ||
def get_google_trends_counts(query: str): | ||
PyTrends = TrendReq(hl="de-DE", tz=60) | ||
PyTrends.build_payload([query], timeframe="today 5-y", geo="DE") | ||
df = PyTrends.interest_over_time() | ||
df = ( | ||
df[~df["isPartial"]] | ||
.drop(columns=["isPartial"]) | ||
.rename(columns={query: "count"}) | ||
) | ||
# when rate limit is reached, this should be 60 seconds according to https://github.com/GeneralMills/pytrends | ||
sleep(1) | ||
return df |
15 changes: 15 additions & 0 deletions
15
backend-python/media_impact_monitor/data_loaders/web/google_trends_test.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from media_impact_monitor.data_loaders.web.google_trends import get_google_trends_counts | ||
|
||
|
||
def test_get_google_trends_counts(): | ||
df = get_google_trends_counts("corona") | ||
assert not df.empty | ||
assert df.columns == ["count"] | ||
assert df.index.name == "date" | ||
assert df.index.is_monotonic_increasing | ||
assert df["count"].dtype == int | ||
assert df["count"].min() >= 0 | ||
assert df["count"].max() >= 0 | ||
assert df["count"].max() == 100 | ||
assert df["count"].sum() >= 0 | ||
assert df["count"].sum() <= 100 * len(df) |
Oops, something went wrong.