Skip to content

Commit

Permalink
working proof of concept of explore feature
Browse files Browse the repository at this point in the history
  • Loading branch information
shiffman committed Mar 26, 2024
1 parent 38c6081 commit f517021
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 75 deletions.
5 changes: 2 additions & 3 deletions public/markov-chain/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
</head>

<body>
<h1 id="title"></h1>
<p id="description"></p>
<script src="markov.js"></script>
<script src="markov-char.js"></script>
<!-- <script src="markov.js"></script> -->
<script src="sketch.js"></script>
</body>
</html>
81 changes: 81 additions & 0 deletions public/markov-chain/markov-char.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// A2Z F23
// Daniel Shiffman
// https://github.com/Programming-from-A-to-Z/A2Z-F23

// This is based on Allison Parrish's great RWET examples
// https://github.com/aparrish/rwet-examples

// A Markov Generator class
class MarkovGenerator {
constructor(n, max) {
// Order (or length) of each ngram
this.n = n;
// What is the maximum amount we will generate?
this.max = max;
// An object as dictionary
// each ngram is the key, a list of possible next elements are the values
this.ngrams = {};
// A separate array of possible beginnings to generated text
this.beginnings = [];
}

// A function to feed in text to the markov chain
feed(text) {
// Discard this line if it's too short
if (text.length < this.n) {
return false;
}

// Store the first ngram of this line
let beginning = text.substring(0, this.n);
this.beginnings.push(beginning);

// Now let's go through everything and create the dictionary
for (let i = 0; i < text.length - this.n; i++) {
let gram = text.substring(i, i + this.n);
let next = text.charAt(i + this.n);
// Is this a new one?
if (!this.ngrams.hasOwnProperty(gram)) {
this.ngrams[gram] = [];
}
// Add to the list
this.ngrams[gram].push(next);
}
}

// Generate a text from the information ngrams
// Updated generate function to incorporate recursion with a limit on the results array size
generate(prompt, results = [], currentLength = 0) {
if (results.length >= 10) {
// If we've reached the max number of results, stop the recursion
return results;
}

let current = prompt.substring(prompt.length - this.n, prompt.length);

if (currentLength >= this.max) {
// If we've reached the max length for a single result, add it to the results and return
if (!results.includes(prompt)) {
// Prevent adding duplicates
results.push(prompt);
}
return results;
} else if (this.ngrams[current]) {
// If the current ngram has possible continuations
let possible_next = this.ngrams[current];
for (let next of possible_next) {
// For each possible continuation, recursively generate more continuations
let newPrompt = prompt + next; // Create a new string with the next character
this.generate(newPrompt, results, currentLength + 1); // Pass the results array through recursion
if (results.length >= this.max) {
// Early exit if max results are reached during recursion
return results;
}
}
} else if (!results.includes(prompt)) {
// If no possible continuations and the prompt is not already in results
results.push(prompt);
}
return results;
}
}
36 changes: 11 additions & 25 deletions public/markov-chain/markov.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,19 @@ class MarkovGeneratorWord {

// A function to feed in text to the markov chain
feed(text) {
var tokens = text.tokenize();
const tokens = text.tokenize();

// Discard this line if it's too short
if (tokens.length < this.n) {
return false;
}

// Store the first ngram of this line
var beginning = tokens.slice(0, this.n).join(' ');
const beginning = tokens.slice(0, this.n).join(' ');
this.beginnings.push(beginning);

// Now let's go through everything and create the dictionary
for (var i = 0; i < tokens.length - this.n; i++) {
for (let i = 0; i < tokens.length - this.n; i++) {
// Usings slice to pull out N elements from the array
let gram = tokens.slice(i, i + this.n).join(' ');
// What's the next element in the array?
Expand All @@ -52,36 +52,22 @@ class MarkovGeneratorWord {
// Add to the list
this.ngrams[gram].push(next);
}

//console.log(this.beginnings);
}

// Generate a text from the information ngrams
generate(prompt) {
// Get a random beginning
let current = prompt;
let results = [];

// The output is now an array of tokens that we'll join later
let output = current.tokenize();

// Generate a new token max number of times
for (let i = 0; i < this.max; i++) {
// If this is a valid ngram
if (this.ngrams[current]) {
// What are all the possible next tokens
let possible_next = this.ngrams[current];
// Pick one randomly
let next = random(possible_next);
// Add to the output
output.push(next);
// Get the last N entries of the output; we'll use this to look up
// an ngram in the next iteration of the loop
current = output.slice(output.length - this.n, output.length).join(' ');
} else {
break;
if (this.ngrams[current]) {
let possible_next = this.ngrams[current];
for (let i = 0; i < possible_next.length; i++) {
let output = current.tokenize();
output.push(possible_next[i]);
results.push(output.join(' '));
}
}
// Here's what we got!
return output.join(' ');
return results;
}
}
74 changes: 27 additions & 47 deletions public/markov-chain/sketch.js
Original file line number Diff line number Diff line change
@@ -1,73 +1,54 @@
// https://github.com/Programming-from-A-to-Z/A2Z-F23

//next question - how to display relevant search results?

let markov;
let input;
let data;
let output;

let minN = 3;
let maxN = 6;

function preload() {
data = loadJSON('thesis-193.json');
}

// let allData = [];

// async function loadData() {
// for (let venue of data.venues) {
// statusP.html(`Loading ${venue.name}`);
// let id = venue.id;
// let url = `https://itp.nyu.edu/projects/public/projectsJSON_ALL.php?venue_id=${id}`;
// let response = await fetch(url);
// let json = await response.json();
// allData.push(json);
// }
// statusP.hide();
// input = createInput();
// }

function buildMarkov(n) {
// The Markov Generator
// First argument is N-gram length, second argument is max length of generated text
markov = new MarkovGeneratorWord(n, 10);
markov = new MarkovGenerator(n, 20);
process(data);
// for (let json of allData) {
// process(json);
// }
}

function setup() {
noCanvas();
input = createInput();

// loadData();
input.input(goMarkov);
output = createP();
}

//generate a markov after there is at least a word
//detect by sensing space?
//then split the words, use them as beginnings for markov generation

function keyPressed() {
//if space key is pressed, add the previous words into prompt arrays and send them to markov chain as beginnings
if (keyCode === 32) {
let value = input.value();
//split the input into tokens to do word count
let wordCount = value.tokenize();

//if word count is less than 4 (n-gram), use the word count as n-gram
if (wordCount.length < 4) {
buildMarkov(wordCount.length);
} else {
//if word count is more than 4, use 4 as n-gram
buildMarkov(4);
function goMarkov() {
let value = input.value();
if (value.length <= minN) {
let matches = [];
for (let i = 0; i < data.projects.length; i++) {
let elevator = data.projects[i].elevator_pitch;
let len = minN - value.length;
let regex = new RegExp(`${value}.{${len}}`, 'g');
let match = elevator.match(regex);
if (match) {
matches.push(match[0]);
}
}
generate(value);
value = random(matches);
buildMarkov(minN);
} else if (value.length > maxN) {
buildMarkov(maxN);
}
generate(value);
}

function process(data) {
let projects = data.projects;
for (let i = 0; i < projects.length; i++) {
//feed the markov generator with elevator pitches to get closer search to content than just titles
// feed the markov generator with elevator pitches to get closer search to content than just titles
// Not all the data has elevator pitches
if (projects[i].elevator_pitch) {
let elevator = decodeHtml(projects[i].elevator_pitch);
Expand All @@ -83,7 +64,6 @@ function decodeHtml(html) {
}

function generate(input) {
// Generate a title
let title = select('#title');
title.html(markov.generate(input));
let results = markov.generate(input);
output.html(results.join('<br>'));
}

0 comments on commit f517021

Please sign in to comment.