Skip to content

Commit

Permalink
feat: pargent (#738)
Browse files Browse the repository at this point in the history
* feat: pargent

Signed-off-by: yihong0618 <[email protected]>

* fix: add requirements duckdb

Signed-off-by: yihong0618 <[email protected]>

---------

Signed-off-by: yihong0618 <[email protected]>
  • Loading branch information
yihong0618 authored Nov 19, 2024
1 parent 46473b6 commit 058a222
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 4 deletions.
13 changes: 9 additions & 4 deletions .github/workflows/run_data_sync.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ env:
TITLE: Yihong0618 Running
MIN_GRID_DISTANCE: 10 # change min distance here
TITLE_GRID: Over 10km Runs # also here
GITHUB_NAME: "[email protected]" # default Actions bot you can change it to yours
GITHUB_EMAIL: "GitHub Action" # default Actions bot you can change it to yours

# IGNORE_BEFORE_SAVING: True # if you want to ignore some data before saving, set this to True
IGNORE_START_END_RANGE: 10 # Unit meter
Expand All @@ -41,6 +39,7 @@ env:
SAVE_DATA_IN_GITHUB_CACHE: false # if you deploy in the vercal, check the README
DATA_CACHE_PREFIX: 'track_data'
BUILD_GH_PAGES: true # If you do not need GitHub Page please set it to `false`
SAVE_TO_PARQENT: true # If you want to save the data to the repo, set it to `true`

jobs:
sync:
Expand Down Expand Up @@ -210,11 +209,17 @@ jobs:
python run_page/gen_svg.py --from-db --type circular --use-localtime
python run_page/gen_svg.py --from-db --year $(date +"%Y") --language zh_CN --title "$(date +"%Y") Running" --type github --athlete "${{ env.ATHLETE }}" --special-distance 10 --special-distance2 20 --special-color yellow --special-color2 red --output assets/github_$(date +"%Y").svg --use-localtime --min-distance 0.5
- name: Save data to parqent
if: env.SAVE_TO_PARQENT == 'true'
run: |
pip install duckdb
python run_page/save_to_parqent.py
- name: Push new runs
if: env.SAVE_DATA_IN_GITHUB_CACHE != 'true'
run: |
git config --local user.email "${{ env.GITHUB_EMAIL }}"
git config --local user.name "${{ env.GITHUB_NAME }}"
git config --local user.email "[email protected]"
git config --local user.name "GitHub Action"
git add .
git commit -m 'update new runs' || echo "nothing to commit"
git push || echo "nothing to push"
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ garmin-fit-sdk
haversine==2.8.0
garth
pycryptodome
duckdb
101 changes: 101 additions & 0 deletions run_page/save_to_parqent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import duckdb

with duckdb.connect() as conn:
conn.install_extension("sqlite")
conn.load_extension("sqlite")
conn.sql("ATTACH 'run_page/data.db' (TYPE SQLITE);USE data;")
conn.sql(
"COPY (SELECT * FROM activities) TO 'run_page/data.parquet' (FORMAT PARQUET);"
)

"""
examples:
duckdb.sql("select regexp_extract(location_country, '[\u4e00-\u9fa5]{2,}(市|自治州|特别行政区)') as run_location, concat(try_cast(sum(distance/1000) as integer)::varchar,' km') as run_distance from read_parquet('https://github.com/yihong0618/run/raw/refs/heads/master/run_page/data.parquet') where run_location is not NULL group by run_location order by sum(distance) desc;").show(max_rows=50)
┌──────────────┬──────────────┐
│ run_location │ run_distance │
│ varchar │ varchar │
├──────────────┼──────────────┤
│ 大连市 │ 9328 km │
│ 沈阳市 │ 2030 km │
│ 北京市 │ 61 km │
│ 长沙市 │ 24 km │
│ 扬州市 │ 21 km │
│ 盘锦市 │ 21 km │
│ 烟台市 │ 21 km │
│ 上海市 │ 12 km │
│ 北九州市 │ 7 km │
│ 丹东市 │ 5 km │
│ 瓦房店市 │ 4 km │
│ 竹田市 │ 3 km │
│ 伊万里市 │ 2 km │
│ 长春市 │ 1 km │
│ 锦州市 │ 1 km │
│ │ 0 km │
├──────────────┴──────────────┤
│ 16 rows 2 columns │
└─────────────────────────────┘
duckdb.sql("select start_date_local, distance, name, location_country from read_parquet('https://github.com/yihong0618/run/raw/refs/heads/master/run_page/data.parquet') order by run_id desc limit 1;")
duckdb.sql("select start_date_local[:4] as year, sum(distance/1000)::integer from read_parquet('https://github.com/yihong0618/run/raw/refs/heads/master/run_page/data.parquet') group by year order by year desc;").show(max_rows=50)
┌─────────┬─────────────────────────────────────────┐
│ year │ CAST(sum((distance / 1000)) AS INTEGER) │
│ varchar │ int32 │
├─────────┼─────────────────────────────────────────┤
│ 2024 │ 1605 │
│ 2023 │ 696 │
│ 2022 │ 758 │
│ 2021 │ 1244 │
│ 2020 │ 1284 │
│ 2019 │ 1344 │
│ 2018 │ 405 │
│ 2017 │ 964 │
│ 2016 │ 901 │
│ 2015 │ 436 │
│ 2014 │ 823 │
│ 2013 │ 790 │
│ 2012 │ 387 │
├─────────┴─────────────────────────────────────────┤
│ 13 rows 2 columns │
└───────────────────────────────────────────────────┘
duckdb.sql("SELECT concat(try_cast(distance/1000 as integer)::varchar,' km') as distance_km,count(*) FROM read_parquet('https://github.com/yihong0618/run/raw/refs/heads/master/run_page/data.parquet') GROUP BY distance_km order by count(*) desc;").show(max_rows=50)
┌─────────────┬──────────────┐
│ distance_km │ count_star() │
│ varchar │ int64 │
├─────────────┼──────────────┤
│ 2 km │ 706 │
│ 3 km │ 639 │
│ 1 km │ 493 │
│ 5 km │ 391 │
│ 4 km │ 337 │
│ 6 km │ 164 │
│ 10 km │ 84 │
│ 8 km │ 55 │
│ 7 km │ 54 │
│ 0 km │ 29 │
│ 12 km │ 25 │
│ 11 km │ 17 │
│ 9 km │ 17 │
│ 15 km │ 15 │
│ 21 km │ 8 │
│ 16 km │ 7 │
│ 14 km │ 6 │
│ 20 km │ 6 │
│ 17 km │ 4 │
│ 18 km │ 3 │
│ 19 km │ 2 │
│ 13 km │ 2 │
│ 43 km │ 2 │
│ 24 km │ 1 │
│ 41 km │ 1 │
│ 28 km │ 1 │
├─────────────┴──────────────┤
│ 26 rows 2 columns │
└────────────────────────────┘
"""

0 comments on commit 058a222

Please sign in to comment.