Skip to content

Commit

Permalink
fix(blockworks): use next data api (DIYgod#18521)
Browse files Browse the repository at this point in the history
* fix(blockworks): use next data api

* fix: limit params
  • Loading branch information
TonyRL authored Mar 4, 2025
1 parent 011af84 commit 92cc3e6
Showing 1 changed file with 73 additions and 85 deletions.
158 changes: 73 additions & 85 deletions lib/routes/blockworks/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { parseDate } from '@/utils/parse-date';
import { load } from 'cheerio';
import logger from '@/utils/logger';
import parser from '@/utils/rss-parser';
import { config } from '@/config';

export const route: Route = {
path: '/',
Expand All @@ -25,44 +26,47 @@ export const route: Route = {
target: '/',
},
],
name: 'Blockworks News',
name: 'News',
maintainers: ['pseudoyu'],
handler,
description: `Blockworks news with full text support.`,
description: 'Blockworks news with full text support.',
};

async function handler(): Promise<Data> {
async function handler(ctx): Promise<Data> {
const rssUrl = 'https://blockworks.co/feed';
const feed = await parser.parseURL(rssUrl);

const limit = ctx.req.query('limit') ? Number.parseInt(ctx.req.query('limit'), 10) : 20;
// Limit to 20 items
const limitedItems = feed.items.slice(0, 20);

const items = await Promise.all(
limitedItems.map(async (item) => {
const articleUrl = item.link || '';
const cleanUrl = articleUrl.split('?')[0];
const limitedItems = feed.items.slice(0, limit);

// Get cached content or fetch new content
const fullText = await cache.tryGet(`blockworks:${cleanUrl}`, () => extractFullText(cleanUrl));
const buildId = await getBuildId();

// Get cover image
const coverUrl = await cache.tryGet(`blockworks:cover:${cleanUrl}`, () => extractCoverUrl(cleanUrl));

return {
title: item.title || 'Untitled',
pubDate: item.isoDate ? parseDate(item.isoDate) : new Date(),
link: cleanUrl,
description: fullText || item.content || item.summary || '',
author: item.creator,
category: item.categories,
media: coverUrl
? {
content: { url: coverUrl },
}
: undefined,
} as DataItem;
})
const items = await Promise.all(
limitedItems
.map((item) => ({
...item,
link: item.link?.split('?')[0],
}))
.map((item) =>
cache.tryGet(item.link!, async () => {
// Get cached content or fetch new content
const content = await extractFullText(item.link!.split('/').pop()!, buildId);

return {
title: item.title || 'Untitled',
pubDate: item.isoDate ? parseDate(item.isoDate) : undefined,
link: item.link,
description: content.description || item.content || item.contentSnippet || item.summary || '',
author: item.author,
category: content.category,
media: content.imageUrl
? {
content: { url: content.imageUrl },
}
: undefined,
} as DataItem;
})
)
);

return {
Expand All @@ -74,67 +78,51 @@ async function handler(): Promise<Data> {
};
}

async function extractFullText(url: string): Promise<string> {
async function extractFullText(slug: string, buildId: string): Promise<{ description: string; imageUrl: string; category: string[] }> {
try {
const response = await ofetch(url);
const $ = load(response);

const articleSection = $('section.w-full');

if (articleSection.length) {
// Extract all paragraph and list items
const allElements = articleSection.find('p, li');
let text = '';

allElements.each((_, element) => {
const content = $(element).text().trim();
if (content) {
text += content + '\n\n';
}
});

// Remove promotional content at the end
const promoStartKeywords = ["Get the day's top crypto", 'Want alpha sent', "Can't wait?"];

for (const keyword of promoStartKeywords) {
const index = text.indexOf(keyword);
if (index !== -1) {
text = text.substring(0, index).trim();
}
const response = await ofetch(`https://blockworks.co/_next/data/${buildId}/news/${slug}.json?slug=${slug}`);
const article = response.pageProps.article;
const $ = load(article.content, null, false);

// Remove promotional content at the end
$('hr').remove();
$('p > em, p > strong').each((_, el) => {
const $el = $(el);
if ($el.text().includes('To read full editions') || $el.text().includes('Get the news in your inbox')) {
$el.parent().remove();
}
});
$('ul.wp-block-list > li > a').each((_, el) => {
const $el = $(el);
if ($el.attr('href') === 'https://blockworks.co/newsletter/daily') {
$el.parent().parent().remove();
}
});

return text || '';
}

return '';
return {
description: $.html(),
imageUrl: article.imageUrl,
category: [...new Set([...article.categories, ...article.tags])],
};
} catch (error) {
logger.error('Error extracting full text from Blockworks:', error);
return '';
return { description: '', imageUrl: '', category: [] };
}
}

async function extractCoverUrl(url: string): Promise<string> {
try {
const response = await ofetch(url);
const $ = load(response);

const imageContainer = $('div.w-full.relative');
if (imageContainer.length) {
const imgTag = imageContainer.find('img[alt="article-image"]');
if (imgTag.length && imgTag.attr('src')) {
let imgUrl = imgTag.attr('src') as string;

if (!imgUrl.startsWith('http')) {
imgUrl = `https://blockworks.co${imgUrl}`;
}

return decodeURIComponent(imgUrl);
}
}

return '';
} catch (error) {
logger.error('Error extracting cover URL from Blockworks:', error);
return '';
}
}
const getBuildId = () =>
cache.tryGet(
'blockworks:buildId',
async () => {
const response = await ofetch('https://blockworks.co');
const $ = load(response);

return (
$('script#__NEXT_DATA__')
.text()
?.match(/"buildId":"(.*?)",/)?.[1] || ''
);
},
config.cache.routeExpire,
false
) as Promise<string>;

0 comments on commit 92cc3e6

Please sign in to comment.