Skip to content

Fetch News Digest PT #191021

Fetch News Digest PT

Fetch News Digest PT #191021

name: Fetch News Digest PT
on:
repository_dispatch:
#workflow_dispatch:
#schedule:
# - cron: '*/5 6-23 * * *'
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@main
- name: Check out repo
uses: actions/checkout@v2
with:
fetch-depth: 0
#- name: Download Google Trends xml
# run: |-
# curl -o google-trends.xml "https://trends.google.com/trends/trendingsearches/daily/rss?geo=PT&$(date +%s)"
#- name: Build
# run: npm install
- name: Scrape ECO
run: node ecoScraper.js
continue-on-error: true
- name: Scrape CM
run: node cmScraper.js
continue-on-error: true
- name: RSS to JSON Expresso
run: node rss-to-json--expresso.js
continue-on-error: true
- name: RSS to JSON SIC Notícias
run: node rss-to-json--sicn.js
continue-on-error: true
- name: RSS to JSON CNN Portugal
run: node rss-to-json--cnn.js
continue-on-error: true
- name: RSS to JSON Notícias ao Minuto
run: node rss-to-json--noticiasaominuto.js
continue-on-error: true
- name: RSS to JSON RTP Notícas
run: node rss-to-json--rtp.js
continue-on-error: true
- name: RSS to JSON Jornal i
run: node rss-to-json--i.js
continue-on-error: true
- name: RSS to JSON Renascença
run: node rss-to-json--renascenca.js
continue-on-error: true
- name: RSS to JSON Novo
run: node rss-to-json--novo.js
continue-on-error: true
- name: RSS to JSON Económico
run: node rss-to-json--economico.js
continue-on-error: true
#- name: Download RR xml
# run: |-
# curl -o rr.xml https://rr.sapo.pt/rss/rssfeed.aspx?section=section_noticias
# continue-on-error: true
#- name: Download Jornal i xml
# run: |-
# curl -o jornali.xml https://ionline.sapo.pt/rss.xml
# continue-on-error: true
#- name: Download CM xml
# run: |-
# curl -o cm.xml https://www.cmjornal.pt/rss?$(date +%s)
#- name: Download CNN Portugal xml
# run: |-
# curl -o cnnportugal.xml https://cnnportugal.iol.pt/rss.xml
#- name: Delete GMT from rr.xml
# run: |-
# sed -i 's/GMT+1//g' rr.xml
# continue-on-error: true
#- name: Change +01:00 to +00:00from jornali.xml
# run: |-
# sed -i 's/+01:00/+00:00/g' jornali.xml
# continue-on-error: true
#- name: Insert UTF8 line in cm.xml
# run: |-
# sed -i 's/<rss xmlns:atom="http:\/\/www.w3.org\/2005\/Atom" version="2.0">/<?xml version="1.0" encoding="ISO-8859-1"?><rss xmlns:atom="http:\/\/www.w3.org\/2005\/Atom" version="2.0">/g' cm.xml
- name: Fix Timezone Offset in eco.json
run: |-
sed -i 's/.000Z/+00:00/g' eco.json
continue-on-error: true
- name: Download Público Feed
run: |-
curl "https://www.publico.pt/api/list/ultimas?size=12?$(date +%s)" | jq '.[:10] | map(.title = .titulo | .date_published = .data | del(.titulo, .data)) | map({title, url, date_published, media: "Público"})' | jq -r tostring > digest/latest--publico.json
continue-on-error: true
- name: Download Observador Feed
run: |-
curl "https://api.observador.pt/wp/lists/latest/?$(date +%s)" | jq '.[:10] | map(.date_published = .pubDate | del(.pubDate)) | map({title, links, date_published, media: "Observador"})' | jq -r tostring > digest/latest--observador.json
continue-on-error: true
#- name: Download Expresso Feed
# run: |-
# curl "https://www.toptal.com/developers/feed2json/convert?url=https%3A%2F%2Frss.impresa.pt%2Ffeed%2Flatest%2Fexpresso.rss%3F$(date +%s)%26type%3DARTICLE%2CVIDEO%2CGALLERY%2CSTREAM%2CPLAYLIST%2CEVENT%26limit%3D20%26pubsubhub%3Dtrue&minify=on" | jq '.items[:10] | map({title, url, date_published, media: "Expresso"})' | jq -r tostring > digest/latest--expresso.json
# continue-on-error: true
#- name: Download SIC Notícias Feed
# run: |-
# curl "https://www.toptal.com/developers/feed2json/convert?url=https%3A%2F%2Frss.impresa.pt%2Ffeed%2Flatest%2Fsicnot.rss%3F$(date +%s)%26type%3DARTICLE%2CVIDEO%2CGALLERY%2CSTREAM%2CPLAYLIST%2CEVENT%26limit%3D20%26pubsubhub%3Dtrue&minify=on" | jq '.items[:10] | map({title, url, date_published, media: "SIC Notícias"})' | jq -r tostring > digest/latest--sicnoticias.json
# continue-on-error: true
#- name: Download DN Feed
# run: |-
# curl "https://www.toptal.com/developers/feed2json/convert?url=http%3A%2F%2Ffeeds.dn.pt%2FDN-Ultimas?v=$(date +%s)&minify=on" | jq '.items[:10] | map({title, url, date_published, media: "DN"})' | jq -r tostring > digest/latest--dn.json
#- name: Download JN Feed
# run: |-
# curl "https://www.toptal.com/developers/feed2json/convert?url=http%3A%2F%2Ffeeds.dn.pt%2FJN-Ultimas?v=$(date +%s)&minify=on" | jq '.items[:10] | map({title, url, date_published, media: "JN"})' | jq -r tostring > digest/latest--jn.json
#- name: Download RTP Feed
# run: |-
# curl "https://www.toptal.com/developers/feed2json/convert?url=https%3A%2F%2Fwww.rtp.pt%2Fnoticias%2Frss?v=$(date +%s)&minify=on" | jq '.items[:10] | map({title, url, date_published, media: "RTP Notícias"})' | jq -r tostring > digest/latest--rtp.json
# continue-on-error: true
#- name: Download Económico Feed
# run: |-
# curl "https://www.toptal.com/developers/feed2json/convert?url=https%3A%2F%2Fjornaleconomico.sapo.pt%2Ffeed?v=$(date +%s)&minify=on" | jq '.items[:10] | map({title, url, date_published, media: "Económico"})' | jq -r tostring > digest/latest--economico.json
# continue-on-error: true
#- name: Download Dinheiro Vivo Feed
# run: |-
# curl "https://www.toptal.com/developers/feed2json/convert?url=http%3A%2F%2Ffeeds.dinheirovivo.pt%2FDV-Ultimas?v=$(date +%s)&minify=on" | jq '.items[:10] | map({title, url, date_published, media: "Dinheiro Vivo"})' | jq -r tostring > digest/latest--dinheirovivo.json
#- name: Download Correio da Manhã Feed
# run: |-
# curl "https://www.toptal.com/developers/feed2json/convert?url=https://raw.githubusercontent.com/alexmsantos/data/main/cm.xml" | jq '.items[:20] | map({title, url, date_published, media: "Correio da Manhã"})' | jq -r tostring > digest/latest--cm.json
#- name: Download Negócios xml
# run: |-
# curl -o negocios.xml https://www.jornaldenegocios.pt/rss?v=$(date +%s)
#- name: Download Negócios Feed 1
# run: |-
# curl "https://api.rss2json.com/v1/api.json?rss_url=https://www.jornaldenegocios.pt/rss?v=$(date +%s)" | jq -r tostring > negocios.json
#- name: Download Negócios Feed 2
# run: |-
# curl "https://www.toptal.com/developers/feed2json/convert?url=https://www.jornaldenegocios.pt/rss?v=$(date +%s)" | jq '.items[:10] | map({title, url, date_published, media: "Negócios"})' | jq -r tostring > digest/latest--negocios.json
#- name: Change Date format in digest/latest--negocios.json
# run: |-
# sed -e "s/^\(.*\)\([0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}\) \([0-9]\{2\}:[0-9]\{2\}:[0-9]\{2\}[.][0-9]\{1,3\}\)\(.*\)$/\1\2T\3Z\4/" digest/latest--negocios.json
#- name: Download Rádio Renascença Feed
# run: |-
# curl "https://www.toptal.com/developers/feed2json/convert?url=https://raw.githubusercontent.com/alexmsantos/data/main/rr.xml?v=$(date +%s)" | jq '.items[:10] | map({title, url, date_published, media: "Renascença"})' | jq -r tostring > digest/latest--rr.json
# continue-on-error: true
#- name: Download Notícias ao Minuto Feed
# run: |-
# curl "https://www.toptal.com/developers/feed2json/convert?url=https%3A%2F%2Fwww.noticiasaominuto.com%2Frss%2Fultima-hora?v=$(date +%s)&minify=on" | jq '.items[:10] | map({title, url, date_published, media: "Notícias ao Minuto"})' | jq -r tostring > digest/latest--noticiasaominuto.json
# continue-on-error: true
#- name: Download Novo Feed
# run: |-
# curl "https://www.toptal.com/developers/feed2json/convert?url=https://onovo.sapo.pt/feed/?v=$(date +%s)" | jq '.items[:10] | map({title, url, date_published, media: "Novo"})' | jq -r tostring > digest/latest--novo.json
# continue-on-error: true
#- name: Download TSF Feed
# run: |-
# curl "https://www.toptal.com/developers/feed2json/convert?url=http://feeds.tsf.pt/TSF-Ultimas?v=$(date +%s)" | jq '.items[:10] | map({title, url, date_published, media: "TSF"})' | jq -r tostring > digest/latest--tsf.json
- name: Edit eco.json
run: |-
jq '[{title: ."0", url: ."1", date_published: ."2", media: "Eco"},{title: ."3", url: ."4", date_published: ."5", media: "Eco"},{title: ."6", url: ."7", date_published: ."8", media: "Eco"},{title: ."9", url: ."10", date_published: ."11", media: "Eco"},{title: ."12", url: ."13", date_published: ."14", media: "Eco"},{title: ."15", url: ."16", date_published: ."17", media: "Eco"},{title: ."18", url: ."19", date_published: ."20", media: "Eco"},{title: ."21", url: ."22", date_published: ."23", media: "Eco"},{title: ."24", url: ."25", date_published: ."26", media: "Eco"},{title: ."27", url: ."28", date_published: ."29", media: "Eco"}]' eco.json >digest/latest--eco.json
continue-on-error: true
- name: Edit cm.json
run: |-
jq '[{title: ."0", url: ."1", date_published: ."2", media: "Correio da Manhã"},{title: ."3", url: ."4", date_published: ."5", media: "Correio da Manhã"},{title: ."6", url: ."7", date_published: ."8", media: "Correio da Manhã"},{title: ."9", url: ."10", date_published: ."11", media: "Correio da Manhã"},{title: ."12", url: ."13", date_published: ."14", media: "Correio da Manhã"},{title: ."15", url: ."16", date_published: ."17", media: "Correio da Manhã"},{title: ."18", url: ."19", date_published: ."20", media: "Correio da Manhã"},{title: ."21", url: ."22", date_published: ."23", media: "Correio da Manhã"}]' cm.json >digest/latest--cm.json
continue-on-error: true
- name: Edit expresso.json
run: |-
jq '.items[:10] | map(.date_published = .published | .url = .link | del(.published, .link)) | map({title, url, date_published: (if .date_published then (.date_published / 1000 | strftime("%Y-%m-%dT%H:%M:%S+00:00")) else empty end), media: "Expresso"})' expresso.json >digest/latest--expresso.json
continue-on-error: true
- name: Edit sicnoticias.json
run: |-
jq '.items[:10] | map(.date_published = .published | .url = .link | del(.published, .link)) | map({title, url, date_published: (if .date_published then (.date_published / 1000 | strftime("%Y-%m-%dT%H:%M:%S+00:00")) else empty end), media: "SIC Notícias"})' sicnoticias.json >digest/latest--sicnoticias.json
continue-on-error: true
- name: Edit cnnportugal.json
run: |-
jq '.items[:10] | map(.date_published = .published | .url = .link | del(.published, .link)) | map({title, url, date_published: (if .date_published then (.date_published / 1000 | strftime("%Y-%m-%dT%H:%M:%S+00:00")) else empty end), media: "CNN Portugal"})' cnnportugal.json >digest/latest--cnnportugal.json
continue-on-error: true
- name: Edit jornali.json
run: |-
jq '.items[:10] | map(.date_published = .published | .url = .link | del(.published, .link)) | map({title, url, date_published: (if .date_published then (.date_published / 1000 | strftime("%Y-%m-%dT%H:%M:%S+00:00")) else empty end), media: "Jornal i"})' jornali.json >digest/latest--jornali.json
continue-on-error: true
- name: Edit noticiasaominuto.json
run: |-
jq '.items[:10] | map(.date_published = .published | .url = .link | del(.published, .link)) | map({title, url, date_published: (if .date_published then (.date_published / 1000 | strftime("%Y-%m-%dT%H:%M:%S+00:00")) else empty end), media: "Notícias ao Minuto"})' noticiasaominuto.json >digest/latest--noticiasaominuto.json
continue-on-error: true
- name: Edit rtp.json
run: |-
jq '.items[:10] | map(.date_published = .published | .url = .link | del(.published, .link)) | map({title, url, date_published: (if .date_published then (.date_published / 1000 | strftime("%Y-%m-%dT%H:%M:%S+00:00")) else empty end), media: "RTP Notícias"})' rtp.json >digest/latest--rtp.json
continue-on-error: true
- name: Edit ecnomico.json
run: |-
jq '.items[:10] | map(.date_published = .published | .url = .link | del(.published, .link)) | map({title, url, date_published: (if .date_published then (.date_published / 1000 | strftime("%Y-%m-%dT%H:%M:%S+00:00")) else empty end), media: "Económico"})' economico.json >digest/latest--economico.json
continue-on-error: true
- name: Edit renascenca.json
run: |-
jq '.items[:10] | map(.date_published = .published | .url = .link | del(.published, .link)) | map({title, url, date_published: (if .date_published then (.date_published / 1000 + 3600 | strftime("%Y-%m-%dT%H:%M:%S+00:00")) else empty end), media: "Renascença"})' renascenca.json >digest/latest--rr.json
continue-on-error: true
- name: Edit novo.json
run: |-
jq '.items[:10] | map(.date_published = .published | .url = .link | del(.published, .link)) | map({title, url, date_published: (if .date_published then (.date_published / 1000 | strftime("%Y-%m-%dT%H:%M:%S+00:00")) else empty end), media: "Novo"})' novo.json >digest/latest--novo.json
continue-on-error: true
#- name: Download Jornal i Feed
# run: |-
# curl "https://www.toptal.com/developers/feed2json/convert?url=https://raw.githubusercontent.com/alexmsantos/data/main/jornali.xml?v=$(date +%s)" | jq '.items[:10] | map({title, url, date_published, media: "Jornal i"})' | jq -r tostring > digest/latest--jornali.json
# continue-on-error: true
#- name: Change Yes to No in google-trends.xml
# run: |-
# sed -i 's/yes/no/g' google-trends.xml
#- name: Change property name in google-trends.xml
# run: |-
# sed -i 's/ht:approx_traffic/approx_traffic/g' google-trends.xml
#- name: Download Google Trends Feed
# run: |-
# curl "https://www.toptal.com/developers/feed2json/convert?url=https://raw.githubusercontent.com/alexmsantos/data/main/google-trends.xml?v=$(date +%s)" | jq '.items[:10] | map({title, url, date_published, media: "Google Trends"})' | jq -r tostring > digest/google-trends.json
#- name: Download Eco Feed
# run: |-
# curl "https://eco.sapo.pt/wp-json/eco/v1/items" | jq '.[:10] | map(.date_published = .pubDate | del(.pubDate)) | map({title, links, date_published, media: "Eco"})' | jq -r tostring > digest/latest--eco.json
#- name: Download CNN Portugal Feed
# run: |-
# curl "https://api.rss2json.com/v1/api.json?rss_url=https%3A%2F%2Fcnnportugal.iol.pt%2Frss.xml?v=$(date +%s)" | jq '.items[:10] | map(.date_published = .pubDate | .url = .link | del(.pubDate, .link)) | map({title, url, date_published, media: "CNN Portugal"})' | jq '[.[] | .date_published |= (.[:10] + "T" + .[11:] + "Z")]' | jq -r tostring > digest/latest--cnnportugal.json
#- name: Parse date
# run: |-
# jq '.[].date_published |= strflocaltime' digest/latest--cnnportugal.json
- name: Sed links to url in observador.json
run: |-
sed -i 's/"links":{"webUri":/"url":/g' digest/latest--observador.json
continue-on-error: true
- name: Sed links close tab in observador.json
run: |-
sed -i 's/},"date_published":/,"date_published":/g' digest/latest--observador.json
continue-on-error: true
#- name: Find and Replace Links
# uses: jacobtomlinson/gha-find-replace@v3
# with:
# find: '"links":{"webUri":'
# replace: '"url":'
# include: "digest/latest--observador.json"
#- name: Find and Replace links close tag
# uses: jacobtomlinson/gha-find-replace@v3
# with:
# find: '},"date_published":'
# replace: ',"date_published":'
# include: "digest/latest--observador.json"
- name: Merge files
run: |-
jq -s . digest/latest--publico.json digest/latest--observador.json digest/latest--expresso.json digest/latest--rtp.json digest/latest--economico.json digest/latest--noticiasaominuto.json digest/latest--sicnoticias.json digest/latest--rr.json digest/latest--jornali.json digest/latest--eco.json digest/latest--cm.json digest/latest--cnnportugal.json | jq -r tostring > digest/digestpt.json
- name: Commit and push changes
run: |-
git diff
git config user.name "Automated"
git config user.email "---"
git pull
git diff --quiet || (git add -A && git commit -m "Updated News Digest PT")
git push