Skip to content

Commit

Permalink
Merge pull request #1 from wyatt-avilla/yew
Browse files Browse the repository at this point in the history
Yew merge
  • Loading branch information
wyatt-avilla authored Jun 21, 2024
2 parents 7a35c80 + 3c996b8 commit 9f83580
Show file tree
Hide file tree
Showing 48 changed files with 1,605 additions and 270 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,7 @@ Cargo.lock

# env file containing API keys
.env

# wasm related
/frontend/dist/
/frontend/index.html
14 changes: 2 additions & 12 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
workspace = { members = ["backend", "config", "frontend", "types"] }

[package]
name = "pulse"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
cached = {version = "0.51.4", features = ["async"]}
dotenv = "0.15.0"
regex = "1.10.5"
reqwest = "0.12.4"
scraper = "0.19.0"
serde_json = "1.0.117"
tokio = { version = "1.38.0", features = ["full"] }
url = "2.5.1"
21 changes: 21 additions & 0 deletions backend/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
[package]
name = "backend"
version = "0.1.0"
edition = "2021"
build = "build.rs"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
types = { path = "../types" }
config = { path = "../config" }
actix-files = "0.6.6"
actix-web = "4.8.0"
cached = { version = "0.51.4", features = ["async"] }
rand = "0.8.5"
regex = "1.10.5"
reqwest = "0.12.5"
scraper = "0.19.0"
serde_json = "1.0.117"
url = "2.5.2"
yew = "0.21.0"
9 changes: 9 additions & 0 deletions backend/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use std::fs;
use std::path::Path;

fn main() {
let dist_dir = Path::new("../frontend/dist");
if !dist_dir.exists() || fs::read_dir(dist_dir).map_or(true, |entries| entries.count() == 0) {
println!("cargo:warning=Unable to serve frontened");
}
}
4 changes: 4 additions & 0 deletions backend/src/fetching.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pub mod github;
pub mod goodreads;
pub mod lastfm;
pub mod letterboxd;
53 changes: 53 additions & 0 deletions backend/src/fetching/github.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
use cached::proc_macro::once;
use reqwest::{self, header};
use types::Commit;

// 15 min
#[once(result = true, time = 900, sync_writes = true)]
pub async fn fetch_newest(
username: &str,
n: u32,
) -> Result<std::vec::Vec<Commit>, Box<dyn std::error::Error>> {
println!("Fetching data from github api...");
let url = format!("https://api.github.com/users/{username}/events");

let client = reqwest::Client::new();
let response = client
.get(&url)
.header(header::USER_AGENT, "feframe")
.send()
.await?
.text()
.await?;

let json: serde_json::Value = serde_json::from_str(&response)
.map_err(|err| Box::new(err) as Box<dyn std::error::Error>)?;

let json_array = match json.as_array() {
Some(json_array) => json_array.clone(),
None => return Ok(Vec::new()),
};

let push_events: Vec<_> = json_array
.iter()
.filter(|&event| event["type"] == "PushEvent")
.cloned()
.collect();

Ok(push_events
.iter()
.filter_map(|event| {
let commit = &event["payload"]["commits"][0];
let repository_name = event["repo"]["name"].as_str()?.to_string();
let repository_link = format!("https://github.com/{repository_name}");

Some(Commit {
message: commit["message"].as_str()?.to_string(),
url: format!("{repository_link}/commit/{}", commit["sha"].as_str()?),
repository_name,
repository_link,
})
})
.take(n as usize)
.collect())
}
Original file line number Diff line number Diff line change
@@ -1,27 +1,15 @@
use super::ApiRefresh;
use cached::proc_macro::once;
use regex::Regex;
use scraper::{Html, Selector};
use url::{ParseError, Url};

#[derive(Clone)]
pub struct Book {
pub title: String,
pub author: String,
pub title_url: Url,
pub author_url: Url,
}
use std::collections::HashMap;
use types::Book;

fn clean_text(input: &str) -> String {
let trimmed = input.trim().replace(['\n', '\r'], "");
let re = Regex::new(r"\s{2,}").unwrap();
re.replace_all(&trimmed, " ").to_string()
}

fn create_goodreads_url(path: &str) -> Result<Url, ParseError> {
Url::parse(&format!("https://www.goodreads.com/{path}"))
}

fn swap_name_order(full_name: &str) -> Result<String, String> {
let (last, first) = full_name
.split_once(',')
Expand All @@ -31,31 +19,36 @@ fn swap_name_order(full_name: &str) -> Result<String, String> {
Ok(format!("{first} {last}"))
}

impl ApiRefresh for Book {
type Content = Book;

async fn fetch_newest(n: u32) -> Result<std::vec::Vec<Book>, Box<dyn std::error::Error>> {
fetch_newest_books(n).await
}
}

// 1 day
#[once(result = true, time = 86400)]
async fn fetch_newest_books(n: u32) -> Result<std::vec::Vec<Book>, Box<dyn std::error::Error>> {
let shelf = std::env::var("GOODREADS_SHELF")?;
#[once(result = true, time = 86400, sync_writes = true)]
pub async fn fetch_newest(
shelf: &str,
n: u32,
) -> Result<std::vec::Vec<Book>, Box<dyn std::error::Error>> {
println!("Parsing goodreads shelf html...");
let html = Html::parse_document(
&reqwest::get(&shelf)
&reqwest::get(shelf)
.await
.map_err(|err| Box::new(err) as Box<dyn std::error::Error>)?
.text()
.await
.map_err(|err| Box::new(err) as Box<dyn std::error::Error>)?,
);

let ratings = HashMap::from([
("did not like it", 1),
("it was ok", 2),
("liked it", 3),
("really liked it", 4),
("it was amazing", 5),
]);

let row_selector = Selector::parse(r"tr.bookalike.review").unwrap();

let title_selector = Selector::parse(r"td.field.title a").unwrap();
let author_selector = Selector::parse(r"td.field.author a").unwrap();
let rating_selector = Selector::parse(r"td.field.rating span").unwrap();
let cover_selector = Selector::parse(r"td.field.cover img").unwrap();

Ok(html
.select(&row_selector)
Expand All @@ -64,14 +57,20 @@ async fn fetch_newest_books(n: u32) -> Result<std::vec::Vec<Book>, Box<dyn std::
let title_href = title_element.value().attr("href")?;

let author_element = row.select(&author_selector).next()?;
let author_href = author_element.value().attr("href")?;
let author_href = row.select(&author_selector).next()?.value().attr("href")?;

let rating = row.select(&rating_selector).next()?.value().attr("title")?;

let cover_url = row.select(&cover_selector).next()?.value().attr("src")?;

Some(Book {
title: clean_text(&title_element.text().collect::<Vec<_>>().concat()),
author: swap_name_order(&author_element.text().collect::<Vec<_>>().concat())
.ok()?,
title_url: create_goodreads_url(title_href).ok()?,
author_url: create_goodreads_url(author_href).ok()?,
rating: ("★").repeat(*ratings.get(rating)?),
title_url: format!("https://www.goodreads.com{title_href}"),
author_url: format!("https://www.goodreads.com{author_href}"),
cover_url: cover_url.to_string(),
})
})
.take(n as usize)
Expand Down
42 changes: 15 additions & 27 deletions src/dynamic_content/lastfm.rs → backend/src/fetching/lastfm.rs
Original file line number Diff line number Diff line change
@@ -1,30 +1,14 @@
use super::ApiRefresh;
use cached::proc_macro::once;
use url::Url;

#[derive(Clone)]
pub struct Song {
pub title: String,
pub artist_name: String,
pub album_name: String,
pub album_image: Url,
pub url: Url,
}

impl ApiRefresh for Song {
type Content = Song;

async fn fetch_newest(n: u32) -> Result<std::vec::Vec<Song>, Box<dyn std::error::Error>> {
fetch_newest_songs(n).await
}
}
use types::Song;

// 20 min
#[once(result = true, time = 1200)]
async fn fetch_newest_songs(n: u32) -> Result<std::vec::Vec<Song>, Box<dyn std::error::Error>> {
let key = std::env::var("LASTFM_KEY")?;
let username = std::env::var("LASTFM_USERNAME")?;

#[once(result = true, time = 1200, sync_writes = true)]
pub async fn fetch_newest(
username: &str,
key: &str,
n: u32,
) -> Result<std::vec::Vec<Song>, Box<dyn std::error::Error>> {
println!("Fetching data from lastfm api...");
let url = format!("https://ws.audioscrobbler.com/2.0/?method=user.getrecenttracks&user={username}&api_key={key}&format=json");

let response = reqwest::get(&url)
Expand All @@ -49,9 +33,13 @@ async fn fetch_newest_songs(n: u32) -> Result<std::vec::Vec<Song>, Box<dyn std::
title: track["name"].as_str()?.to_string(),
artist_name: track["artist"]["#text"].as_str()?.to_string(),
album_name: track["album"]["#text"].as_str()?.to_string(),
album_image: Url::parse(track["image"].as_array()?.get(1)?.get("#text")?.as_str()?)
.ok()?,
url: Url::parse(track["url"].as_str()?).ok()?,
album_image: track["image"]
.as_array()?
.first()?
.get("#text")?
.as_str()?
.to_string(),
url: (track["url"]).as_str()?.to_string(),
})
})
.take(n as usize)
Expand Down
107 changes: 107 additions & 0 deletions backend/src/fetching/letterboxd.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
use cached::proc_macro::once;
use scraper::{Html, Selector};
use types::Movie;

fn parse_image(html: &Html) -> Result<String, Box<dyn std::error::Error>> {
let img_selector =
Selector::parse("div.react-component.poster.film-poster img.image[src]").unwrap();

Ok(html
.select(&img_selector)
.next()
.ok_or("Image source not found in HTML")?
.attr("src")
.ok_or("Image source attribute not found in HTML")?
.to_string())
}

fn parse_release_year(html: &Html) -> Result<String, Box<dyn std::error::Error>> {
let img_selector = Selector::parse("div.react-component.poster.film-poster").unwrap();

Ok(html
.select(&img_selector)
.next()
.ok_or("Image source not found in HTML")?
.attr("data-film-release-year")
.ok_or("Image source attribute not found in HTML")?
.to_string())
}

// 1 day
#[once(result = true, time = 86400, sync_writes = true)]
pub async fn fetch_newest(
username: &str,
n: u32,
) -> Result<std::vec::Vec<Movie>, Box<dyn std::error::Error>> {
println!("Parsing letterboxd profile html...");
let url = format!("https://letterboxd.com/{username}/films/by/rated-date/");
let html = Html::parse_document(
&reqwest::get(&url)
.await
.map_err(|err| Box::new(err) as Box<dyn std::error::Error>)?
.text()
.await
.map_err(|err| Box::new(err) as Box<dyn std::error::Error>)?,
);

let row_selector = Selector::parse("li.poster-container").unwrap();

let div_selector = Selector::parse("div.really-lazy-load").unwrap();
let rating_selector = Selector::parse("span.rating").unwrap();
let img_selector = Selector::parse("img.image").unwrap();

let movie_iter = html
.select(&row_selector)
.filter_map(|row| {
let title = row
.select(&img_selector)
.next()?
.value()
.attr("alt")?
.to_string();

let rating = row
.select(&rating_selector)
.next()
.map(|r| r.inner_html())
.filter(|r| !r.is_empty())?;

let div_val = row.select(&div_selector).next()?.value();

let link = div_val.attr("data-target-link")?;

let slug = div_val.attr("data-film-slug")?;

Some(Movie {
title,
rating,
release_year: String::new(),
url: format! {"https://letterboxd.com{link}"},
poster_url: slug.to_string(), // icky, just store the slug in here for now xd
})
})
.take(n as usize);

// no async closures :(
let mut movies = Vec::new();
for mut movie in movie_iter {
let html = Html::parse_document(
&reqwest::get(format!(
"https://letterboxd.com/ajax/poster/film/{}/std/70x105/",
movie.poster_url // aka slug
))
.await
.map_err(|err| Box::new(err) as Box<dyn std::error::Error>)?
.text()
.await
.map_err(|err| Box::new(err) as Box<dyn std::error::Error>)?,
);

movie.poster_url = parse_image(&html)?;
movie.release_year = parse_release_year(&html)?;

movies.push(movie);
}

Ok(movies)
}
Loading

0 comments on commit 9f83580

Please sign in to comment.