Merge pull request #1 from wyatt-avilla/yew

Yew merge
wyatt-avilla · Jun 21, 2024 · 9f83580 · 9f83580
2 parents 7a35c80 + 3c996b8
commit 9f83580
Show file tree

Hide file tree

Showing 48 changed files with 1,605 additions and 270 deletions.
diff --git a/.gitignore b/.gitignore
@@ -19,3 +19,7 @@ Cargo.lock
 
 # env file containing API keys
 .env
+
+# wasm related
+/frontend/dist/
+/frontend/index.html
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,16 +1,6 @@
+workspace = { members = ["backend", "config", "frontend", "types"] }
+
 [package]
 name = "pulse"
 version = "0.1.0"
 edition = "2021"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-cached = {version = "0.51.4", features = ["async"]}
-dotenv = "0.15.0"
-regex = "1.10.5"
-reqwest = "0.12.4"
-scraper = "0.19.0"
-serde_json = "1.0.117"
-tokio = { version = "1.38.0", features = ["full"] }
-url = "2.5.1"
diff --git a/backend/Cargo.toml b/backend/Cargo.toml
@@ -0,0 +1,21 @@
+[package]
+name = "backend"
+version = "0.1.0"
+edition = "2021"
+build = "build.rs"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+types = { path = "../types" }
+config = { path = "../config" }
+actix-files = "0.6.6"
+actix-web = "4.8.0"
+cached = { version = "0.51.4", features = ["async"] }
+rand = "0.8.5"
+regex = "1.10.5"
+reqwest = "0.12.5"
+scraper = "0.19.0"
+serde_json = "1.0.117"
+url = "2.5.2"
+yew = "0.21.0"
diff --git a/backend/build.rs b/backend/build.rs
@@ -0,0 +1,9 @@
+use std::fs;
+use std::path::Path;
+
+fn main() {
+    let dist_dir = Path::new("../frontend/dist");
+    if !dist_dir.exists() || fs::read_dir(dist_dir).map_or(true, |entries| entries.count() == 0) {
+        println!("cargo:warning=Unable to serve frontened");
+    }
+}
diff --git a/backend/src/fetching.rs b/backend/src/fetching.rs
@@ -0,0 +1,4 @@
+pub mod github;
+pub mod goodreads;
+pub mod lastfm;
+pub mod letterboxd;
diff --git a/backend/src/fetching/github.rs b/backend/src/fetching/github.rs
@@ -0,0 +1,53 @@
+use cached::proc_macro::once;
+use reqwest::{self, header};
+use types::Commit;
+
+// 15 min
+#[once(result = true, time = 900, sync_writes = true)]
+pub async fn fetch_newest(
+    username: &str,
+    n: u32,
+) -> Result<std::vec::Vec<Commit>, Box<dyn std::error::Error>> {
+    println!("Fetching data from github api...");
+    let url = format!("https://api.github.com/users/{username}/events");
+
+    let client = reqwest::Client::new();
+    let response = client
+        .get(&url)
+        .header(header::USER_AGENT, "feframe")
+        .send()
+        .await?
+        .text()
+        .await?;
+
+    let json: serde_json::Value = serde_json::from_str(&response)
+        .map_err(|err| Box::new(err) as Box<dyn std::error::Error>)?;
+
+    let json_array = match json.as_array() {
+        Some(json_array) => json_array.clone(),
+        None => return Ok(Vec::new()),
+    };
+
+    let push_events: Vec<_> = json_array
+        .iter()
+        .filter(|&event| event["type"] == "PushEvent")
+        .cloned()
+        .collect();
+
+    Ok(push_events
+        .iter()
+        .filter_map(|event| {
+            let commit = &event["payload"]["commits"][0];
+            let repository_name = event["repo"]["name"].as_str()?.to_string();
+            let repository_link = format!("https://github.com/{repository_name}");
+
+            Some(Commit {
+                message: commit["message"].as_str()?.to_string(),
+                url: format!("{repository_link}/commit/{}", commit["sha"].as_str()?),
+                repository_name,
+                repository_link,
+            })
+        })
+        .take(n as usize)
+        .collect())
+}
diff --git a/src/dynamic_content/goodreads.rs → backend/src/fetching/goodreads.rs b/src/dynamic_content/goodreads.rs → backend/src/fetching/goodreads.rs
@@ -1,27 +1,15 @@
-use super::ApiRefresh;
 use cached::proc_macro::once;
 use regex::Regex;
 use scraper::{Html, Selector};
-use url::{ParseError, Url};
-
-#[derive(Clone)]
-pub struct Book {
-    pub title: String,
-    pub author: String,
-    pub title_url: Url,
-    pub author_url: Url,
-}
+use std::collections::HashMap;
+use types::Book;
 
 fn clean_text(input: &str) -> String {
     let trimmed = input.trim().replace(['\n', '\r'], "");
     let re = Regex::new(r"\s{2,}").unwrap();
     re.replace_all(&trimmed, " ").to_string()
 }
 
-fn create_goodreads_url(path: &str) -> Result<Url, ParseError> {
-    Url::parse(&format!("https://www.goodreads.com/{path}"))
-}
-
 fn swap_name_order(full_name: &str) -> Result<String, String> {
     let (last, first) = full_name
         .split_once(',')
@@ -31,31 +19,36 @@ fn swap_name_order(full_name: &str) -> Result<String, String> {
     Ok(format!("{first} {last}"))
 }
 
-impl ApiRefresh for Book {
-    type Content = Book;
-
-    async fn fetch_newest(n: u32) -> Result<std::vec::Vec<Book>, Box<dyn std::error::Error>> {
-        fetch_newest_books(n).await
-    }
-}
-
 // 1 day
-#[once(result = true, time = 86400)]
-async fn fetch_newest_books(n: u32) -> Result<std::vec::Vec<Book>, Box<dyn std::error::Error>> {
-    let shelf = std::env::var("GOODREADS_SHELF")?;
+#[once(result = true, time = 86400, sync_writes = true)]
+pub async fn fetch_newest(
+    shelf: &str,
+    n: u32,
+) -> Result<std::vec::Vec<Book>, Box<dyn std::error::Error>> {
+    println!("Parsing goodreads shelf html...");
     let html = Html::parse_document(
-        &reqwest::get(&shelf)
+        &reqwest::get(shelf)
             .await
             .map_err(|err| Box::new(err) as Box<dyn std::error::Error>)?
             .text()
             .await
             .map_err(|err| Box::new(err) as Box<dyn std::error::Error>)?,
     );
 
+    let ratings = HashMap::from([
+        ("did not like it", 1),
+        ("it was ok", 2),
+        ("liked it", 3),
+        ("really liked it", 4),
+        ("it was amazing", 5),
+    ]);
+
     let row_selector = Selector::parse(r"tr.bookalike.review").unwrap();
 
     let title_selector = Selector::parse(r"td.field.title a").unwrap();
     let author_selector = Selector::parse(r"td.field.author a").unwrap();
+    let rating_selector = Selector::parse(r"td.field.rating span").unwrap();
+    let cover_selector = Selector::parse(r"td.field.cover img").unwrap();
 
     Ok(html
         .select(&row_selector)
@@ -64,14 +57,20 @@ async fn fetch_newest_books(n: u32) -> Result<std::vec::Vec<Book>, Box<dyn std::
             let title_href = title_element.value().attr("href")?;
 
             let author_element = row.select(&author_selector).next()?;
-            let author_href = author_element.value().attr("href")?;
+            let author_href = row.select(&author_selector).next()?.value().attr("href")?;
+
+            let rating = row.select(&rating_selector).next()?.value().attr("title")?;
+
+            let cover_url = row.select(&cover_selector).next()?.value().attr("src")?;
 
             Some(Book {
                 title: clean_text(&title_element.text().collect::<Vec<_>>().concat()),
                 author: swap_name_order(&author_element.text().collect::<Vec<_>>().concat())
                     .ok()?,
-                title_url: create_goodreads_url(title_href).ok()?,
-                author_url: create_goodreads_url(author_href).ok()?,
+                rating: ("★").repeat(*ratings.get(rating)?),
+                title_url: format!("https://www.goodreads.com{title_href}"),
+                author_url: format!("https://www.goodreads.com{author_href}"),
+                cover_url: cover_url.to_string(),
             })
         })
         .take(n as usize)

diff --git a/src/dynamic_content/lastfm.rs → backend/src/fetching/lastfm.rs b/src/dynamic_content/lastfm.rs → backend/src/fetching/lastfm.rs
@@ -1,30 +1,14 @@
-use super::ApiRefresh;
 use cached::proc_macro::once;
-use url::Url;
-
-#[derive(Clone)]
-pub struct Song {
-    pub title: String,
-    pub artist_name: String,
-    pub album_name: String,
-    pub album_image: Url,
-    pub url: Url,
-}
-
-impl ApiRefresh for Song {
-    type Content = Song;
-
-    async fn fetch_newest(n: u32) -> Result<std::vec::Vec<Song>, Box<dyn std::error::Error>> {
-        fetch_newest_songs(n).await
-    }
-}
+use types::Song;
 
 // 20 min
-#[once(result = true, time = 1200)]
-async fn fetch_newest_songs(n: u32) -> Result<std::vec::Vec<Song>, Box<dyn std::error::Error>> {
-    let key = std::env::var("LASTFM_KEY")?;
-    let username = std::env::var("LASTFM_USERNAME")?;
-
+#[once(result = true, time = 1200, sync_writes = true)]
+pub async fn fetch_newest(
+    username: &str,
+    key: &str,
+    n: u32,
+) -> Result<std::vec::Vec<Song>, Box<dyn std::error::Error>> {
+    println!("Fetching data from lastfm api...");
     let url = format!("https://ws.audioscrobbler.com/2.0/?method=user.getrecenttracks&user={username}&api_key={key}&format=json");
 
     let response = reqwest::get(&url)
@@ -49,9 +33,13 @@ async fn fetch_newest_songs(n: u32) -> Result<std::vec::Vec<Song>, Box<dyn std::
                 title: track["name"].as_str()?.to_string(),
                 artist_name: track["artist"]["#text"].as_str()?.to_string(),
                 album_name: track["album"]["#text"].as_str()?.to_string(),
-                album_image: Url::parse(track["image"].as_array()?.get(1)?.get("#text")?.as_str()?)
-                    .ok()?,
-                url: Url::parse(track["url"].as_str()?).ok()?,
+                album_image: track["image"]
+                    .as_array()?
+                    .first()?
+                    .get("#text")?
+                    .as_str()?
+                    .to_string(),
+                url: (track["url"]).as_str()?.to_string(),
             })
         })
         .take(n as usize)

diff --git a/backend/src/fetching/letterboxd.rs b/backend/src/fetching/letterboxd.rs
@@ -0,0 +1,107 @@
+use cached::proc_macro::once;
+use scraper::{Html, Selector};
+use types::Movie;
+
+fn parse_image(html: &Html) -> Result<String, Box<dyn std::error::Error>> {
+    let img_selector =
+        Selector::parse("div.react-component.poster.film-poster img.image[src]").unwrap();
+
+    Ok(html
+        .select(&img_selector)
+        .next()
+        .ok_or("Image source not found in HTML")?
+        .attr("src")
+        .ok_or("Image source attribute not found in HTML")?
+        .to_string())
+}
+
+fn parse_release_year(html: &Html) -> Result<String, Box<dyn std::error::Error>> {
+    let img_selector = Selector::parse("div.react-component.poster.film-poster").unwrap();
+
+    Ok(html
+        .select(&img_selector)
+        .next()
+        .ok_or("Image source not found in HTML")?
+        .attr("data-film-release-year")
+        .ok_or("Image source attribute not found in HTML")?
+        .to_string())
+}
+
+// 1 day
+#[once(result = true, time = 86400, sync_writes = true)]
+pub async fn fetch_newest(
+    username: &str,
+    n: u32,
+) -> Result<std::vec::Vec<Movie>, Box<dyn std::error::Error>> {
+    println!("Parsing letterboxd profile html...");
+    let url = format!("https://letterboxd.com/{username}/films/by/rated-date/");
+    let html = Html::parse_document(
+        &reqwest::get(&url)
+            .await
+            .map_err(|err| Box::new(err) as Box<dyn std::error::Error>)?
+            .text()
+            .await
+            .map_err(|err| Box::new(err) as Box<dyn std::error::Error>)?,
+    );
+
+    let row_selector = Selector::parse("li.poster-container").unwrap();
+
+    let div_selector = Selector::parse("div.really-lazy-load").unwrap();
+    let rating_selector = Selector::parse("span.rating").unwrap();
+    let img_selector = Selector::parse("img.image").unwrap();
+
+    let movie_iter = html
+        .select(&row_selector)
+        .filter_map(|row| {
+            let title = row
+                .select(&img_selector)
+                .next()?
+                .value()
+                .attr("alt")?
+                .to_string();
+
+            let rating = row
+                .select(&rating_selector)
+                .next()
+                .map(|r| r.inner_html())
+                .filter(|r| !r.is_empty())?;
+
+            let div_val = row.select(&div_selector).next()?.value();
+
+            let link = div_val.attr("data-target-link")?;
+
+            let slug = div_val.attr("data-film-slug")?;
+
+            Some(Movie {
+                title,
+                rating,
+                release_year: String::new(),
+                url: format! {"https://letterboxd.com{link}"},
+                poster_url: slug.to_string(), // icky, just store the slug in here for now xd
+            })
+        })
+        .take(n as usize);
+
+    // no async closures :(
+    let mut movies = Vec::new();
+    for mut movie in movie_iter {
+        let html = Html::parse_document(
+            &reqwest::get(format!(
+                "https://letterboxd.com/ajax/poster/film/{}/std/70x105/",
+                movie.poster_url // aka slug
+            ))
+            .await
+            .map_err(|err| Box::new(err) as Box<dyn std::error::Error>)?
+            .text()
+            .await
+            .map_err(|err| Box::new(err) as Box<dyn std::error::Error>)?,
+        );
+
+        movie.poster_url = parse_image(&html)?;
+        movie.release_year = parse_release_year(&html)?;
+
+        movies.push(movie);
+    }
+
+    Ok(movies)
+}