From a0bcaaf13fe8ce455b90ea7c7255ae2936f34e15 Mon Sep 17 00:00:00 2001 From: George <31376482+george-gca@users.noreply.github.com> Date: Tue, 13 Feb 2024 13:47:42 -0300 Subject: [PATCH] Added support for google scholar citations (#2193) Closes #1809, but there are caveats: 1 - it only works at build time, which means it won't update the numbers unless you build your site again 2 - Google might block the request if it receives lots of it, failing the whole process. This is how it looks like when it can fetch the information: ![Screenshot from 2024-02-13 00-37-52](https://github.com/alshedivat/al-folio/assets/31376482/646d1f3c-1294-491b-bc13-9013e38918b4) And this when it fails: ![image](https://github.com/alshedivat/al-folio/assets/31376482/516eefff-d394-44ad-8702-8982233f8c4f) Signed-off-by: George Araujo --- _bibliography/papers.bib | 1 + _config.yml | 3 +- _layouts/bib.liquid | 8 ++- _plugins/google-scholar-citations.rb | 78 ++++++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 _plugins/google-scholar-citations.rb diff --git a/_bibliography/papers.bib b/_bibliography/papers.bib index ce201ffac77e..a3ed37574e8a 100644 --- a/_bibliography/papers.bib +++ b/_bibliography/papers.bib @@ -44,6 +44,7 @@ @article{PhysRev.47.777 pdf={example_pdf.pdf}, altmetric={248277}, dimensions={true}, + google_scholar_id={qyhmnyLat1gC}, selected={true} } diff --git a/_config.yml b/_config.yml index 6c3f3cedd943..f91271f93931 100644 --- a/_config.yml +++ b/_config.yml @@ -73,7 +73,7 @@ x_username: # your X handle mastodon_username: # your mastodon instance+username in the format instance.tld/@username linkedin_username: # your LinkedIn user name telegram_username: # your Telegram user name -scholar_userid: # your Google Scholar ID +scholar_userid: qc6CJjYAAAAJ # your Google Scholar ID semanticscholar_id: # your Semantic Scholar ID whatsapp_number: # your WhatsApp number (full phone number in international format. Omit any zeroes, brackets, or dashes when adding the phone number in international format.) orcid_id: # your ORCID ID @@ -311,6 +311,7 @@ scholar: enable_publication_badges: altmetric: true # Altmetric badge (https://www.altmetric.com/products/altmetric-badges/) dimensions: true # Dimensions badge (https://badge.dimensions.ai/) + google_scholar: true # Google Scholar badge (https://scholar.google.com/intl/en/scholar/citations.html) # Filter out certain bibtex entry keywords used internally from the bib output filtered_bibtex_keywords: diff --git a/_layouts/bib.liquid b/_layouts/bib.liquid index 72df52f1ee63..e4d0d148f165 100644 --- a/_layouts/bib.liquid +++ b/_layouts/bib.liquid @@ -211,7 +211,8 @@ {% if site.enable_publication_badges %} {% assign entry_has_altmetric_badge = entry.altmetric or entry.doi or entry.eprint or entry.pmid or entry.isbn %} {% assign entry_has_dimensions_badge = entry.dimensions or entry.doi or entry.pmid %} - {% if entry_has_altmetric_badge or entry_has_dimensions_badge %} + {% assign entry_has_google_scholar_badge = entry.google_scholar_id %} + {% if entry_has_altmetric_badge or entry_has_dimensions_badge or entry_has_google_scholar_badge %}
{% if site.enable_publication_badges.altmetric and entry_has_altmetric_badge %} {% endif %} + {% if site.enable_publication_badges.google_scholar and entry_has_google_scholar_badge %} + + + + {% endif %}
{% endif %} {% endif %} diff --git a/_plugins/google-scholar-citations.rb b/_plugins/google-scholar-citations.rb new file mode 100644 index 000000000000..78dd1a1ce32a --- /dev/null +++ b/_plugins/google-scholar-citations.rb @@ -0,0 +1,78 @@ +require "active_support/all" +require 'nokogiri' +require 'open-uri' + +module Helpers + extend ActiveSupport::NumberHelper +end + +module Jekyll + class GoogleScholarCitationsTag < Liquid::Tag + Citations = { } + + def initialize(tag_name, params, tokens) + super + splitted = params.split(" ").map(&:strip) + @scholar_id = splitted[0] + @article_id = splitted[1] + end + + def render(context) + article_id = context[@article_id.strip] + scholar_id = context[@scholar_id.strip] + article_url = "https://scholar.google.com/citations?view_op=view_citation&hl=en&user=#{scholar_id}&citation_for_view=#{scholar_id}:#{article_id}" + + begin + # If the citation count has already been fetched, return it + if GoogleScholarCitationsTag::Citations[article_id] + return GoogleScholarCitationsTag::Citations[article_id] + end + + # Sleep for a random amount of time to avoid being blocked + sleep(rand(1.5..3.5)) + + # Fetch the article page + doc = Nokogiri::HTML(URI.open(article_url, "User-Agent" => "Ruby/#{RUBY_VERSION}")) + + # Attempt to extract the "Cited by n" string from the meta tags + citation_count = 0 + + # Look for meta tags with "name" attribute set to "description" + description_meta = doc.css('meta[name="description"]') + og_description_meta = doc.css('meta[property="og:description"]') + + if !description_meta.empty? + cited_by_text = description_meta[0]['content'] + matches = cited_by_text.match(/Cited by (\d+[,\d]*)/) + + if matches + citation_count = matches[1].to_i + end + + elsif !og_description_meta.empty? + cited_by_text = og_description_meta[0]['content'] + matches = cited_by_text.match(/Cited by (\d+[,\d]*)/) + + if matches + citation_count = matches[1].to_i + end + end + + citation_count = Helpers.number_to_human(citation_count, :format => '%n%u', :precision => 2, :units => { :thousand => 'K', :million => 'M', :billion => 'B' }) + + rescue Exception => e + # Handle any errors that may occur during fetching + citation_count = "N/A" + + # Print the error message including the exception class and message + puts "Error fetching citation count for #{article_id}: #{e.class} - #{e.message}" + end + + + GoogleScholarCitationsTag::Citations[article_id] = citation_count + return "#{citation_count}" + end + end +end + +Liquid::Template.register_tag('google_scholar_citations', Jekyll::GoogleScholarCitationsTag) \ No newline at end of file