Skip to content

Commit

Permalink
fix: Knowledge cleaning and type migration (#4)
Browse files Browse the repository at this point in the history
* fix: Clean some code

* fix: Remove timestamp

* bump version
  • Loading branch information
RezaRahemtola authored Jul 27, 2024
1 parent 810b5d1 commit 2532836
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 29 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@libertai/libertai-js",
"version": "0.0.8",
"version": "0.0.9",
"description": "In-browser SDK for interacting with LibertAI Decentralized AI Network",
"keywords": [],
"type": "module",
Expand Down
6 changes: 2 additions & 4 deletions src/inference.ts
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@ export class LlamaCppApiEngine {
// Allow caller to specify a target user, if different from Message[-1].role
targetUser: string | null = null
): string {
let usedTokens = 0;
const maxTokens = model.maxTokens;
const promptFormat = model.promptFormat;

Expand All @@ -168,13 +167,12 @@ export class LlamaCppApiEngine {
systemPrompt += `${promptFormat.lineSeparator}`;

// Determine how many tokens we have left
usedTokens = calculateTokenLength(systemPrompt);
let usedTokens = calculateTokenLength(systemPrompt);

// Iterate over messages in reverse order
// to generate the chat log
let chatLog = `${promptFormat.userPrepend}${persona.role.toLowerCase()}${promptFormat.userAppend}`;
for (let i = messages.length - 1; i >= 0; i--) {
const message = messages[i];
for (const message of messages.reverse()) {
let messageLog = '';
messageLog += `${promptFormat.userPrepend}${message.role.toLowerCase()}${promptFormat.userAppend}`;
messageLog += `${message.content}`;
Expand Down
31 changes: 10 additions & 21 deletions src/knowledge-store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,6 @@ export class KnowledgeStore {

// Initialize the localforage store
this.store = idb.createStore(this.config.storeName);

this.load = this.load.bind(this);
this.addDocument = this.addDocument.bind(this);
this.removeDocument = this.removeDocument.bind(this);
this.searchDocuments = this.searchDocuments.bind(this);
}

/**
Expand Down Expand Up @@ -69,7 +64,7 @@ export class KnowledgeStore {
): Promise<Document> {
// Create a new document object
const doc = createDocument(title, tags);
// Split the document into chunks (which are just Lanhchain documents)
// Split the document into chunks (which are just LangChain documents)
const chunks = await chunkText(title, content);

// Embed each chunk and save the embeddings to localforage
Expand All @@ -85,11 +80,10 @@ export class KnowledgeStore {
}
} catch (e) {
console.error(
'libertai-js::KnowledgeStore::addDocument - Error embedding chunk: %s',
e
`libertai-js::KnowledgeStore::addDocument - Error embedding chunk: ${e}`
);
await this.prune();
throw Error('Error embedding batch: ' + e);
throw Error(`Error embedding batch: ${e}`);
}

// Embed the last batch
Expand All @@ -115,9 +109,8 @@ export class KnowledgeStore {
throw new Error(`Document not found: documentId = ${documentId}`);
}
// Remove all embeddings for the document
await this.store.iterate((obj, id, _iterationNumber) => {
await this.store.iterate((embedding: Embedding, id) => {
if (id === this.config.documentsKey) return;
const embedding = obj as Embedding;
if (embedding.documentId === documentId) {
this.store.removeItem(id);
}
Expand All @@ -135,9 +128,8 @@ export class KnowledgeStore {
*/
async prune(): Promise<number> {
let count = 0;
await this.store.iterate((obj, id, _iterationNumber) => {
await this.store.iterate((embedding: Embedding, id) => {
if (id === this.config.documentsKey) return;
const embedding = obj as Embedding;
if (!this.documents.has(embedding.documentId)) {
this.store.removeItem(id);
count += 1;
Expand All @@ -147,7 +139,7 @@ export class KnowledgeStore {
}

/**
* Search the documents in the store for the given query for similarity by euclidean distance
* Search the documents in the store for the given query for similarity by Euclidean distance
* @param query The query to search for
* @param k The number of results to return
* @param max_distance The maximum distance between the query and a result
Expand All @@ -161,26 +153,23 @@ export class KnowledgeStore {
tags: string[] = []
): Promise<SearchResult[]> {
const query_vector = await embed(query, this.config.embeddingApiUrl);
let matches: SearchResult[] | null = null;
matches = [];
const matches: SearchResult[] = [];
let n = 0;
// Iterate over all embeddings
await this.store.iterate((obj, id, _iterationNumber) => {
await this.store.iterate((embedding: Embedding, id) => {
if (n >= k) {
return;
}

// Skip the documents key
if (id === this.config.documentsKey) return;
// Check if this is a valid embedding
const embedding = obj as Embedding;

// If we have tags, make sure the embedding has one of them
const doc = this.documents.get(embedding.documentId);
if (!doc) {
console.warn(
"libertai-js::KnowledgeStore::searchDocuments - Couldn't find document for embedding: embdding_id = %s",
embedding.id
`libertai-js::KnowledgeStore::searchDocuments - Couldn't find document for embedding: embedding_id = ${embedding.id}`
);
return;
}
Expand All @@ -195,7 +184,7 @@ export class KnowledgeStore {
}
}

// Get the euclidean distance between the query and the embedding
// Get the Euclidean distance between the query and the embedding
const euclidean_distance = distance.euclidean(
query_vector,
embedding.vector
Expand Down
2 changes: 0 additions & 2 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ export interface Message {
role: string;
// Message content
content: string;
// Date and time the message was sent
timestamp?: Date;
}

/* Inference types */
Expand Down
2 changes: 1 addition & 1 deletion src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ export async function chunkText(
separators: ['\n\n---\n\n', '\n\n', '\n', ' '],
});

// Split into a list of langchain documents
// Split into a list of LangChain documents
const documents = await splitter.createDocuments(
[content],
// TODO: include metadata
Expand Down

0 comments on commit 2532836

Please sign in to comment.