Fetch News Digest PT #193401
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Fetch News Digest PT | |
on: | |
repository_dispatch: | |
#workflow_dispatch: | |
#schedule: | |
# - cron: '*/5 6-23 * * *' | |
jobs: | |
build: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@main | |
- name: Check out repo | |
uses: actions/checkout@v2 | |
with: | |
fetch-depth: 0 | |
#- name: Download Google Trends xml | |
# run: |- | |
# curl -o google-trends.xml "https://trends.google.com/trends/trendingsearches/daily/rss?geo=PT&$(date +%s)" | |
#- name: Build | |
# run: npm install | |
- name: Scrape ECO | |
run: node ecoScraper.js | |
continue-on-error: true | |
- name: Scrape CM | |
run: node cmScraper.js | |
continue-on-error: true | |
- name: RSS to JSON Expresso | |
run: node rss-to-json--expresso.js | |
continue-on-error: true | |
- name: RSS to JSON SIC Notícias | |
run: node rss-to-json--sicn.js | |
continue-on-error: true | |
- name: RSS to JSON CNN Portugal | |
run: node rss-to-json--cnn.js | |
continue-on-error: true | |
- name: RSS to JSON Notícias ao Minuto | |
run: node rss-to-json--noticiasaominuto.js | |
continue-on-error: true | |
- name: RSS to JSON RTP Notícas | |
run: node rss-to-json--rtp.js | |
continue-on-error: true | |
- name: RSS to JSON Jornal i | |
run: node rss-to-json--i.js | |
continue-on-error: true | |
- name: RSS to JSON Renascença | |
run: node rss-to-json--renascenca.js | |
continue-on-error: true | |
- name: RSS to JSON Novo | |
run: node rss-to-json--novo.js | |
continue-on-error: true | |
- name: RSS to JSON Económico | |
run: node rss-to-json--economico.js | |
continue-on-error: true | |
#- name: Download RR xml | |
# run: |- | |
# curl -o rr.xml https://rr.sapo.pt/rss/rssfeed.aspx?section=section_noticias | |
# continue-on-error: true | |
#- name: Download Jornal i xml | |
# run: |- | |
# curl -o jornali.xml https://ionline.sapo.pt/rss.xml | |
# continue-on-error: true | |
#- name: Download CM xml | |
# run: |- | |
# curl -o cm.xml https://www.cmjornal.pt/rss?$(date +%s) | |
#- name: Download CNN Portugal xml | |
# run: |- | |
# curl -o cnnportugal.xml https://cnnportugal.iol.pt/rss.xml | |
#- name: Delete GMT from rr.xml | |
# run: |- | |
# sed -i 's/GMT+1//g' rr.xml | |
# continue-on-error: true | |
#- name: Change +01:00 to +00:00from jornali.xml | |
# run: |- | |
# sed -i 's/+01:00/+00:00/g' jornali.xml | |
# continue-on-error: true | |
#- name: Insert UTF8 line in cm.xml | |
# run: |- | |
# sed -i 's/<rss xmlns:atom="http:\/\/www.w3.org\/2005\/Atom" version="2.0">/<?xml version="1.0" encoding="ISO-8859-1"?><rss xmlns:atom="http:\/\/www.w3.org\/2005\/Atom" version="2.0">/g' cm.xml | |
- name: Fix Timezone Offset in eco.json | |
run: |- | |
sed -i 's/.000Z/+00:00/g' eco.json | |
continue-on-error: true | |
- name: Download Público Feed | |
run: |- | |
curl "https://www.publico.pt/api/list/ultimas?size=12?$(date +%s)" | jq '.[:10] | map(.title = .titulo | .date_published = .data | del(.titulo, .data)) | map({title, url, date_published, media: "Público"})' | jq -r tostring > digest/latest--publico.json | |
continue-on-error: true | |
- name: Download Observador Feed | |
run: |- | |
curl "https://api.observador.pt/wp/lists/latest/?$(date +%s)" | jq '.[:10] | map(.date_published = .pubDate | del(.pubDate)) | map({title, links, date_published, media: "Observador"})' | jq -r tostring > digest/latest--observador.json | |
continue-on-error: true | |
#- name: Download Expresso Feed | |
# run: |- | |
# curl "https://www.toptal.com/developers/feed2json/convert?url=https%3A%2F%2Frss.impresa.pt%2Ffeed%2Flatest%2Fexpresso.rss%3F$(date +%s)%26type%3DARTICLE%2CVIDEO%2CGALLERY%2CSTREAM%2CPLAYLIST%2CEVENT%26limit%3D20%26pubsubhub%3Dtrue&minify=on" | jq '.items[:10] | map({title, url, date_published, media: "Expresso"})' | jq -r tostring > digest/latest--expresso.json | |
# continue-on-error: true | |
#- name: Download SIC Notícias Feed | |
# run: |- | |
# curl "https://www.toptal.com/developers/feed2json/convert?url=https%3A%2F%2Frss.impresa.pt%2Ffeed%2Flatest%2Fsicnot.rss%3F$(date +%s)%26type%3DARTICLE%2CVIDEO%2CGALLERY%2CSTREAM%2CPLAYLIST%2CEVENT%26limit%3D20%26pubsubhub%3Dtrue&minify=on" | jq '.items[:10] | map({title, url, date_published, media: "SIC Notícias"})' | jq -r tostring > digest/latest--sicnoticias.json | |
# continue-on-error: true | |
#- name: Download DN Feed | |
# run: |- | |
# curl "https://www.toptal.com/developers/feed2json/convert?url=http%3A%2F%2Ffeeds.dn.pt%2FDN-Ultimas?v=$(date +%s)&minify=on" | jq '.items[:10] | map({title, url, date_published, media: "DN"})' | jq -r tostring > digest/latest--dn.json | |
#- name: Download JN Feed | |
# run: |- | |
# curl "https://www.toptal.com/developers/feed2json/convert?url=http%3A%2F%2Ffeeds.dn.pt%2FJN-Ultimas?v=$(date +%s)&minify=on" | jq '.items[:10] | map({title, url, date_published, media: "JN"})' | jq -r tostring > digest/latest--jn.json | |
#- name: Download RTP Feed | |
# run: |- | |
# curl "https://www.toptal.com/developers/feed2json/convert?url=https%3A%2F%2Fwww.rtp.pt%2Fnoticias%2Frss?v=$(date +%s)&minify=on" | jq '.items[:10] | map({title, url, date_published, media: "RTP Notícias"})' | jq -r tostring > digest/latest--rtp.json | |
# continue-on-error: true | |
#- name: Download Económico Feed | |
# run: |- | |
# curl "https://www.toptal.com/developers/feed2json/convert?url=https%3A%2F%2Fjornaleconomico.sapo.pt%2Ffeed?v=$(date +%s)&minify=on" | jq '.items[:10] | map({title, url, date_published, media: "Económico"})' | jq -r tostring > digest/latest--economico.json | |
# continue-on-error: true | |
#- name: Download Dinheiro Vivo Feed | |
# run: |- | |
# curl "https://www.toptal.com/developers/feed2json/convert?url=http%3A%2F%2Ffeeds.dinheirovivo.pt%2FDV-Ultimas?v=$(date +%s)&minify=on" | jq '.items[:10] | map({title, url, date_published, media: "Dinheiro Vivo"})' | jq -r tostring > digest/latest--dinheirovivo.json | |
#- name: Download Correio da Manhã Feed | |
# run: |- | |
# curl "https://www.toptal.com/developers/feed2json/convert?url=https://raw.githubusercontent.com/alexmsantos/data/main/cm.xml" | jq '.items[:20] | map({title, url, date_published, media: "Correio da Manhã"})' | jq -r tostring > digest/latest--cm.json | |
#- name: Download Negócios xml | |
# run: |- | |
# curl -o negocios.xml https://www.jornaldenegocios.pt/rss?v=$(date +%s) | |
#- name: Download Negócios Feed 1 | |
# run: |- | |
# curl "https://api.rss2json.com/v1/api.json?rss_url=https://www.jornaldenegocios.pt/rss?v=$(date +%s)" | jq -r tostring > negocios.json | |
#- name: Download Negócios Feed 2 | |
# run: |- | |
# curl "https://www.toptal.com/developers/feed2json/convert?url=https://www.jornaldenegocios.pt/rss?v=$(date +%s)" | jq '.items[:10] | map({title, url, date_published, media: "Negócios"})' | jq -r tostring > digest/latest--negocios.json | |
#- name: Change Date format in digest/latest--negocios.json | |
# run: |- | |
# sed -e "s/^\(.*\)\([0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}\) \([0-9]\{2\}:[0-9]\{2\}:[0-9]\{2\}[.][0-9]\{1,3\}\)\(.*\)$/\1\2T\3Z\4/" digest/latest--negocios.json | |
#- name: Download Rádio Renascença Feed | |
# run: |- | |
# curl "https://www.toptal.com/developers/feed2json/convert?url=https://raw.githubusercontent.com/alexmsantos/data/main/rr.xml?v=$(date +%s)" | jq '.items[:10] | map({title, url, date_published, media: "Renascença"})' | jq -r tostring > digest/latest--rr.json | |
# continue-on-error: true | |
#- name: Download Notícias ao Minuto Feed | |
# run: |- | |
# curl "https://www.toptal.com/developers/feed2json/convert?url=https%3A%2F%2Fwww.noticiasaominuto.com%2Frss%2Fultima-hora?v=$(date +%s)&minify=on" | jq '.items[:10] | map({title, url, date_published, media: "Notícias ao Minuto"})' | jq -r tostring > digest/latest--noticiasaominuto.json | |
# continue-on-error: true | |
#- name: Download Novo Feed | |
# run: |- | |
# curl "https://www.toptal.com/developers/feed2json/convert?url=https://onovo.sapo.pt/feed/?v=$(date +%s)" | jq '.items[:10] | map({title, url, date_published, media: "Novo"})' | jq -r tostring > digest/latest--novo.json | |
# continue-on-error: true | |
#- name: Download TSF Feed | |
# run: |- | |
# curl "https://www.toptal.com/developers/feed2json/convert?url=http://feeds.tsf.pt/TSF-Ultimas?v=$(date +%s)" | jq '.items[:10] | map({title, url, date_published, media: "TSF"})' | jq -r tostring > digest/latest--tsf.json | |
- name: Edit eco.json | |
run: |- | |
jq '[{title: ."0", url: ."1", date_published: ."2", media: "Eco"},{title: ."3", url: ."4", date_published: ."5", media: "Eco"},{title: ."6", url: ."7", date_published: ."8", media: "Eco"},{title: ."9", url: ."10", date_published: ."11", media: "Eco"},{title: ."12", url: ."13", date_published: ."14", media: "Eco"},{title: ."15", url: ."16", date_published: ."17", media: "Eco"},{title: ."18", url: ."19", date_published: ."20", media: "Eco"},{title: ."21", url: ."22", date_published: ."23", media: "Eco"},{title: ."24", url: ."25", date_published: ."26", media: "Eco"},{title: ."27", url: ."28", date_published: ."29", media: "Eco"}]' eco.json >digest/latest--eco.json | |
continue-on-error: true | |
- name: Edit cm.json | |
run: |- | |
jq '[{title: ."0", url: ."1", date_published: ."2", media: "Correio da Manhã"},{title: ."3", url: ."4", date_published: ."5", media: "Correio da Manhã"},{title: ."6", url: ."7", date_published: ."8", media: "Correio da Manhã"},{title: ."9", url: ."10", date_published: ."11", media: "Correio da Manhã"},{title: ."12", url: ."13", date_published: ."14", media: "Correio da Manhã"},{title: ."15", url: ."16", date_published: ."17", media: "Correio da Manhã"},{title: ."18", url: ."19", date_published: ."20", media: "Correio da Manhã"},{title: ."21", url: ."22", date_published: ."23", media: "Correio da Manhã"}]' cm.json >digest/latest--cm.json | |
continue-on-error: true | |
- name: Edit expresso.json | |
run: |- | |
jq '.items[:10] | map(.date_published = .published | .url = .link | del(.published, .link)) | map({title, url, date_published: (if .date_published then (.date_published / 1000 | strftime("%Y-%m-%dT%H:%M:%S+00:00")) else empty end), media: "Expresso"})' expresso.json >digest/latest--expresso.json | |
continue-on-error: true | |
- name: Edit sicnoticias.json | |
run: |- | |
jq '.items[:10] | map(.date_published = .published | .url = .link | del(.published, .link)) | map({title, url, date_published: (if .date_published then (.date_published / 1000 | strftime("%Y-%m-%dT%H:%M:%S+00:00")) else empty end), media: "SIC Notícias"})' sicnoticias.json >digest/latest--sicnoticias.json | |
continue-on-error: true | |
- name: Edit cnnportugal.json | |
run: |- | |
jq '.items[:10] | map(.date_published = .published | .url = .link | del(.published, .link)) | map({title, url, date_published: (if .date_published then (.date_published / 1000 | strftime("%Y-%m-%dT%H:%M:%S+00:00")) else empty end), media: "CNN Portugal"})' cnnportugal.json >digest/latest--cnnportugal.json | |
continue-on-error: true | |
- name: Edit jornali.json | |
run: |- | |
jq '.items[:10] | map(.date_published = .published | .url = .link | del(.published, .link)) | map({title, url, date_published: (if .date_published then (.date_published / 1000 | strftime("%Y-%m-%dT%H:%M:%S+00:00")) else empty end), media: "Jornal i"})' jornali.json >digest/latest--jornali.json | |
continue-on-error: true | |
- name: Edit noticiasaominuto.json | |
run: |- | |
jq '.items[:10] | map(.date_published = .published | .url = .link | del(.published, .link)) | map({title, url, date_published: (if .date_published then (.date_published / 1000 | strftime("%Y-%m-%dT%H:%M:%S+00:00")) else empty end), media: "Notícias ao Minuto"})' noticiasaominuto.json >digest/latest--noticiasaominuto.json | |
continue-on-error: true | |
- name: Edit rtp.json | |
run: |- | |
jq '.items[:10] | map(.date_published = .published | .url = .link | del(.published, .link)) | map({title, url, date_published: (if .date_published then (.date_published / 1000 | strftime("%Y-%m-%dT%H:%M:%S+00:00")) else empty end), media: "RTP Notícias"})' rtp.json >digest/latest--rtp.json | |
continue-on-error: true | |
- name: Edit ecnomico.json | |
run: |- | |
jq '.items[:10] | map(.date_published = .published | .url = .link | del(.published, .link)) | map({title, url, date_published: (if .date_published then (.date_published / 1000 | strftime("%Y-%m-%dT%H:%M:%S+00:00")) else empty end), media: "Económico"})' economico.json >digest/latest--economico.json | |
continue-on-error: true | |
- name: Edit renascenca.json | |
run: |- | |
jq '.items[:10] | map(.date_published = .published | .url = .link | del(.published, .link)) | map({title, url, date_published: (if .date_published then (.date_published / 1000 + 3600 | strftime("%Y-%m-%dT%H:%M:%S+00:00")) else empty end), media: "Renascença"})' renascenca.json >digest/latest--rr.json | |
continue-on-error: true | |
- name: Edit novo.json | |
run: |- | |
jq '.items[:10] | map(.date_published = .published | .url = .link | del(.published, .link)) | map({title, url, date_published: (if .date_published then (.date_published / 1000 | strftime("%Y-%m-%dT%H:%M:%S+00:00")) else empty end), media: "Novo"})' novo.json >digest/latest--novo.json | |
continue-on-error: true | |
#- name: Download Jornal i Feed | |
# run: |- | |
# curl "https://www.toptal.com/developers/feed2json/convert?url=https://raw.githubusercontent.com/alexmsantos/data/main/jornali.xml?v=$(date +%s)" | jq '.items[:10] | map({title, url, date_published, media: "Jornal i"})' | jq -r tostring > digest/latest--jornali.json | |
# continue-on-error: true | |
#- name: Change Yes to No in google-trends.xml | |
# run: |- | |
# sed -i 's/yes/no/g' google-trends.xml | |
#- name: Change property name in google-trends.xml | |
# run: |- | |
# sed -i 's/ht:approx_traffic/approx_traffic/g' google-trends.xml | |
#- name: Download Google Trends Feed | |
# run: |- | |
# curl "https://www.toptal.com/developers/feed2json/convert?url=https://raw.githubusercontent.com/alexmsantos/data/main/google-trends.xml?v=$(date +%s)" | jq '.items[:10] | map({title, url, date_published, media: "Google Trends"})' | jq -r tostring > digest/google-trends.json | |
#- name: Download Eco Feed | |
# run: |- | |
# curl "https://eco.sapo.pt/wp-json/eco/v1/items" | jq '.[:10] | map(.date_published = .pubDate | del(.pubDate)) | map({title, links, date_published, media: "Eco"})' | jq -r tostring > digest/latest--eco.json | |
#- name: Download CNN Portugal Feed | |
# run: |- | |
# curl "https://api.rss2json.com/v1/api.json?rss_url=https%3A%2F%2Fcnnportugal.iol.pt%2Frss.xml?v=$(date +%s)" | jq '.items[:10] | map(.date_published = .pubDate | .url = .link | del(.pubDate, .link)) | map({title, url, date_published, media: "CNN Portugal"})' | jq '[.[] | .date_published |= (.[:10] + "T" + .[11:] + "Z")]' | jq -r tostring > digest/latest--cnnportugal.json | |
#- name: Parse date | |
# run: |- | |
# jq '.[].date_published |= strflocaltime' digest/latest--cnnportugal.json | |
- name: Sed links to url in observador.json | |
run: |- | |
sed -i 's/"links":{"webUri":/"url":/g' digest/latest--observador.json | |
continue-on-error: true | |
- name: Sed links close tab in observador.json | |
run: |- | |
sed -i 's/},"date_published":/,"date_published":/g' digest/latest--observador.json | |
continue-on-error: true | |
#- name: Find and Replace Links | |
# uses: jacobtomlinson/gha-find-replace@v3 | |
# with: | |
# find: '"links":{"webUri":' | |
# replace: '"url":' | |
# include: "digest/latest--observador.json" | |
#- name: Find and Replace links close tag | |
# uses: jacobtomlinson/gha-find-replace@v3 | |
# with: | |
# find: '},"date_published":' | |
# replace: ',"date_published":' | |
# include: "digest/latest--observador.json" | |
- name: Merge files | |
run: |- | |
jq -s . digest/latest--publico.json digest/latest--observador.json digest/latest--expresso.json digest/latest--rtp.json digest/latest--economico.json digest/latest--noticiasaominuto.json digest/latest--sicnoticias.json digest/latest--rr.json digest/latest--jornali.json digest/latest--eco.json digest/latest--cm.json digest/latest--cnnportugal.json | jq -r tostring > digest/digestpt.json | |
- name: Commit and push changes | |
run: |- | |
git diff | |
git config user.name "Automated" | |
git config user.email "---" | |
git pull | |
git diff --quiet || (git add -A && git commit -m "Updated News Digest PT") | |
git push |