-
Notifications
You must be signed in to change notification settings - Fork 21
/
index.html
179 lines (160 loc) · 6.53 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>wllama.cpp demo</title>
<style>
body {
background-color: rgb(55, 55, 55);
color: rgb(222, 222, 222);
font-family: 'Courier New', Courier, monospace;
padding: 1em;
}
#output_cmpl {
border: 1px solid #aaa;
border-radius: 5px;
padding: 0.7em;
}
</style>
</head>
<body>
<h2>Completions</h2>
<button id="btn_start_cmpl">Load test model</button> Test model: <span id="cmpl_model"></span><br/>
<label for="btn_pick_file">Or, pick a local gguf file(s):</label>
<input type="file" id="btn_pick_file" name="btn_pick_file" accept=".gguf" multiple><br><br>
Prompt: <input id="input_prompt" value="Once upon a time," /><br/>
Number of tokens: <input id="input_n_predict" value="50" type="number" /><br/>
<button id="btn_run_cmpl">Run completions</button>
<br/>
<br/>
Completion: <br/>
<div id="output_cmpl"></div>
<br/>
--------------------<br/>
<br/>
<h2>Embeddings</h2>
<button id="btn_start_embd">Load model</button> Model: <span id="embd_model"></span><br/>
Text A: <input id="input_a" value="What is the weather like?" /><br/>
Text B: <input id="input_b" value="Will it rain tomorrow?" /><br/>
<button id="btn_run_embd">Calculate embeddings and cosine distance</button><br/>
<br/>
Cosine distance = <span id="output_embd"></span>
<script type="module">
import { Wllama } from '../../esm/index.js';
const CONFIG_PATHS = {
'single-thread/wllama.js' : '../../esm/single-thread/wllama.js',
'single-thread/wllama.wasm' : '../../esm/single-thread/wllama.wasm',
'multi-thread/wllama.js' : '../../esm/multi-thread/wllama.js',
'multi-thread/wllama.wasm' : '../../esm/multi-thread/wllama.wasm',
'multi-thread/wllama.worker.mjs': '../../esm/multi-thread/wllama.worker.mjs',
};
const CMPL_MODEL = 'https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories15M-q4_0.gguf';
const CMPL_MODEL_SIZE = '19MB';
const EMBD_MODEL = 'https://huggingface.co/ggml-org/models/resolve/main/bert-bge-small/ggml-model-f16.gguf';
const EMBD_MODEL_SIZE = '67MB';
async function main() {
setCmplDisable(true);
setEmbdDisable(true);
const getName = (url) => url.match(/\/resolve\/main(.*)/)[1];
elemCmplModel.textContent = `${getName(CMPL_MODEL)}, size: ${CMPL_MODEL_SIZE}`;
elemEmbdModel.textContent = `${getName(EMBD_MODEL)}, size: ${EMBD_MODEL_SIZE}`;
elemBtnStartCmpl.onclick = async () => {
elemBtnStartCmpl.disabled = true;
elemBtnPickFile.disabled = true;
await startCompletions(CMPL_MODEL);
};
elemBtnPickFile.onchange = async (event) => {
const { files } = event.target;
if (files.length === 0) return;
elemBtnStartCmpl.disabled = true;
elemBtnPickFile.disabled = true;
await startCompletions(null, files);
};
elemBtnStartEmbd.onclick = async () => {
elemBtnStartEmbd.disabled = true;
await startEmbeddings(EMBD_MODEL);
};
}
/////////////////////////////////////////////////////////////////////
// completions
async function startCompletions(modelUrl, files) {
const wllama = new Wllama(CONFIG_PATHS);
// await wllama.cacheManager.clear();
if (files) {
await wllama.loadModel(files);
} else {
await wllama.loadModelFromUrl(modelUrl);
}
setCmplDisable(false);
elemBtnCompletions.onclick = async () => {
setCmplDisable(true);
await wllama.createCompletion(elemInput.value, {
nPredict: parseInt(elemNPredict.value),
sampling: {
temp: 0.5,
top_k: 40,
top_p: 0.9,
},
onNewToken: (token, piece, currentText) => {
elemOutputCmpl.textContent = currentText;
},
});
setCmplDisable(false);
};
}
/////////////////////////////////////////////////////////////////////
// embeddings
async function startEmbeddings(modelUrl) {
const wllama1 = new Wllama(CONFIG_PATHS);
await wllama1.loadModelFromUrl(modelUrl, {
// IMPORTANT: do not forget to set embeddings to true. If not set, "createEmbedding" will crash the app
embeddings: true,
pooling_type: 'LLAMA_POOLING_TYPE_MEAN', // depend on the model, you will need to change this
});
// optionally, you can switch between embeddings and generation mode whenever you want:
// await wllama.setOptions({ embeddings: false });
setEmbdDisable(false);
elemBtnEmbeddings.onclick = async () => {
setEmbdDisable(true);
const embdA = await wllama1.createEmbedding(elemInputA.value);
console.log({embdA});
const embdB = await wllama1.createEmbedding(elemInputB.value);
console.log({embdB});
// since embeddings are normalized, we don't need to calculate norm
const dotProd = embdA.reduce((acc, _, i) => acc + embdA[i]*embdB[i], 0);
elemOutputEmbd.textContent = dotProd;
setEmbdDisable(false);
};
}
/////////////////////////////////////////////////////////////////////
// DOM elements: completions
const elemCmplModel = document.getElementById('cmpl_model');
const elemBtnStartCmpl = document.getElementById('btn_start_cmpl');
const elemBtnPickFile = document.getElementById('btn_pick_file');
const elemInput = document.getElementById('input_prompt');
const elemNPredict = document.getElementById('input_n_predict');
const elemBtnCompletions = document.getElementById('btn_run_cmpl');
const elemOutputCmpl = document.getElementById('output_cmpl');
// DOM elements: embeddings
const elemEmbdModel = document.getElementById('embd_model');
const elemBtnStartEmbd = document.getElementById('btn_start_embd');
const elemInputA = document.getElementById('input_a');
const elemInputB = document.getElementById('input_b');
const elemBtnEmbeddings = document.getElementById('btn_run_embd');
const elemOutputEmbd = document.getElementById('output_embd');
// utils
const setCmplDisable = (disabled) => {
elemInput.disabled = disabled;
elemNPredict.disabled = disabled;
elemBtnCompletions.disabled = disabled;
};
const setEmbdDisable = (disabled) => {
elemInputA.disabled = disabled;
elemInputB.disabled = disabled;
elemBtnEmbeddings.disabled = disabled;
};
main();
</script>
</body>
</html>