Skip to content

Commit

Permalink
minor comments to help review main feature files
Browse files Browse the repository at this point in the history
- Comments added to pdf.rs, mod.rs, document_loaders.rs
  • Loading branch information
Tachikoma000 committed Sep 19, 2024
1 parent a852793 commit 9c8e751
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 6 deletions.
6 changes: 2 additions & 4 deletions rig-core/examples/document_loaders.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
// examples/document_loaders.rs

use rig::{
completion::Prompt,
document_loaders::PdfLoader,
Expand Down Expand Up @@ -50,8 +48,8 @@ async fn main() -> Result<(), anyhow::Error> {
embeddings.len()
);
for emb in &embeddings {
println!("Document ID: {}", emb.id);
println!("Document Content: {:?}", emb.document);
// println!("Document ID: {}", emb.id);
// println!("Document Content: {:?}", emb.document);
println!("Number of embeddings: {}", emb.embeddings.len());
println!(
"First embedding vector length: {}",
Expand Down
4 changes: 3 additions & 1 deletion rig-core/src/document_loaders/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// src/document_loaders/mod.rs
//! This module contains the implementation of document loaders for various file formats.
//! Currently, it includes loaders for CSV and PDF files.
mod csv;
// mod directory;
Expand All @@ -14,6 +15,7 @@ use std::error::Error as StdError;

#[async_trait]
pub trait DocumentLoader {
/// Asynchronously loads the document and returns a vector of document embeddings.
async fn load(&self) -> Result<Vec<DocumentEmbeddings>, Box<dyn StdError + Send + Sync>>;
}

Expand Down
13 changes: 12 additions & 1 deletion rig-core/src/document_loaders/pdf.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
// Import necessary dependencies
use super::DocumentLoader;
use crate::embeddings::DocumentEmbeddings;
use async_trait::async_trait;
use lopdf::Document;
use serde_json::json;

// Define a struct for loading PDF documents
pub struct PdfLoader {
path: String,
}

impl PdfLoader {
// Implement a constructor for the PdfLoader struct
pub fn new(path: &str) -> Self {
Self {
path: path.to_string(),
Expand All @@ -18,17 +21,25 @@ impl PdfLoader {

#[async_trait]
impl DocumentLoader for PdfLoader {
// Implement the load function for the DocumentLoader trait
async fn load(
&self,
) -> Result<Vec<DocumentEmbeddings>, Box<dyn std::error::Error + Send + Sync>> {
// Load the PDF document from the specified path
let doc = Document::load(&self.path)?;

// Extract text from each page of the PDF document
let mut text = String::new();
for page in doc.get_pages() {
if let Ok(content) = doc.extract_text(&[page.0]) {
text.push_str(&content);
}
}
println!("Extracted text from PDF: {}", text); // Debug print

// Print the extracted text for debugging purposes
println!("Extracted text from PDF: {}", text);

// Create a DocumentEmbeddings object with the extracted text
Ok(vec![DocumentEmbeddings {
id: self.path.clone(),
document: json!({"text": text}),
Expand Down

0 comments on commit 9c8e751

Please sign in to comment.