Skip to content

Commit

Permalink
Improve ETL Pipeline and add initial Use Cases
Browse files Browse the repository at this point in the history
Signed-off-by: Thomas Vitale <[email protected]>
  • Loading branch information
ThomasVitale committed Apr 29, 2024
1 parent 1e68816 commit 6375c3e
Show file tree
Hide file tree
Showing 98 changed files with 788 additions and 669 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Text Document Readers: Ollama
# Question Answering With Documents

Reading and vectorizing text documents with LLMs via Ollama.
Ask questions about documents with LLMs via Ollama.

## Running the application

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class ChatController {

@PostMapping("/chat/doc")
String chatWithDocument(@RequestBody String input) {
return chatService.chatWithDocument(input).getContent();
return chatService.chatWithDocument(input);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package com.thomasvitale.ai.spring;

import org.springframework.ai.chat.ChatClient;
import org.springframework.ai.chat.messages.UserMessage;
import org.springframework.ai.chat.prompt.SystemPromptTemplate;
import org.springframework.ai.document.Document;
import org.springframework.ai.vectorstore.SearchRequest;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.stereotype.Service;

import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

@Service
class ChatService {

private final ChatClient chatClient;
private final VectorStore vectorStore;

ChatService(ChatClient chatClient, VectorStore vectorStore) {
this.chatClient = chatClient;
this.vectorStore = vectorStore;
}

String chatWithDocument(String message) {
var systemPromptTemplate = new SystemPromptTemplate("""
You are a helpful assistant, conversing with a user about the subjects contained in a set of documents.
Use the information from the DOCUMENTS section to provide accurate answers. If unsure or if the answer
isn't found in the DOCUMENTS section, simply state that you don't know the answer and do not mention
the DOCUMENTS section.
DOCUMENTS:
{documents}
""");

List<Document> similarDocuments = vectorStore.similaritySearch(SearchRequest.query(message).withTopK(5));
String content = similarDocuments.stream().map(Document::getContent).collect(Collectors.joining(System.lineSeparator()));

Map<String,Object> model = Map.of("documents", content);
var systemMessage = systemPromptTemplate.createMessage(model);

var userMessage = new UserMessage(message);

return chatClient.call(systemMessage, userMessage);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import org.springframework.ai.document.Document;
import org.springframework.ai.reader.TextReader;
import org.springframework.ai.vectorstore.SimpleVectorStore;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.Resource;
import org.springframework.stereotype.Component;
Expand All @@ -15,38 +16,38 @@
import java.util.List;

@Component
public class DocumentInitializer {
public class DocumentEtlPipeline {

private static final Logger log = LoggerFactory.getLogger(DocumentInitializer.class);
private final SimpleVectorStore vectorStore;
private static final Logger logger = LoggerFactory.getLogger(DocumentEtlPipeline.class);
private final VectorStore vectorStore;

@Value("classpath:documents/story1.md")
Resource textFile1;

@Value("classpath:documents/story2.txt")
Resource textFile2;

public DocumentInitializer(SimpleVectorStore vectorStore) {
public DocumentEtlPipeline(VectorStore vectorStore) {
this.vectorStore = vectorStore;
}

@PostConstruct
public void run() {
List<Document> documents = new ArrayList<>();

log.info("Loading .md files as Documents");
logger.info("Loading .md files as Documents");
var textReader1 = new TextReader(textFile1);
textReader1.getCustomMetadata().put("location", "North Pole");
textReader1.setCharset(Charset.defaultCharset());
documents.addAll(textReader1.get());

log.info("Loading .txt files as Documents");
logger.info("Loading .txt files as Documents");
var textReader2 = new TextReader(textFile2);
textReader2.getCustomMetadata().put("location", "Italy");
textReader2.setCharset(Charset.defaultCharset());
documents.addAll(textReader2.get());

log.info("Creating and storing Embeddings from Documents");
logger.info("Creating and storing Embeddings from Documents");
vectorStore.add(documents);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package com.thomasvitale.ai.spring;

import org.springframework.ai.embedding.EmbeddingClient;
import org.springframework.ai.vectorstore.SimpleVectorStore;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.annotation.Bean;

@SpringBootApplication
public class QuestionAnsweringWithDocuments {

@Bean
VectorStore vectorStore(EmbeddingClient embeddingClient) {
return new SimpleVectorStore(embeddingClient);
}

public static void main(String[] args) {
SpringApplication.run(QuestionAnsweringWithDocuments.class, args);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package com.thomasvitale.ai.spring;

import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;

@SpringBootTest
@Disabled
class QuestionAnsweringWithDocumentsTests {

@Test
void contextLoads() {
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package com.thomasvitale.ai.spring;

import org.springframework.boot.SpringApplication;
import org.springframework.boot.devtools.restart.RestartScope;
import org.springframework.boot.test.context.TestConfiguration;
import org.springframework.boot.testcontainers.service.connection.ServiceConnection;
import org.springframework.context.annotation.Bean;
import org.testcontainers.ollama.OllamaContainer;
import org.testcontainers.utility.DockerImageName;

@TestConfiguration(proxyBeanMethods = false)
public class TestQuestionAnsweringWithDocuments {

@Bean
@RestartScope
@ServiceConnection
OllamaContainer ollama() {
return new OllamaContainer(DockerImageName.parse("ghcr.io/thomasvitale/ollama-llama3")
.asCompatibleSubstituteFor("ollama/ollama"));
}

public static void main(String[] args) {
SpringApplication.from(QuestionAnsweringWithDocuments::main).with(TestQuestionAnsweringWithDocuments.class).run(args);
}

}

This file was deleted.

Loading

0 comments on commit 6375c3e

Please sign in to comment.