Skip to content

Commit

Permalink
rough implementation of Python tool
Browse files Browse the repository at this point in the history
  • Loading branch information
breandan committed Jan 28, 2025
1 parent a8a8a1a commit 2838676
Show file tree
Hide file tree
Showing 3 changed files with 1,842 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,12 @@ class Bindex<T>(
val ntIndices: Map<T, Int> = indexedNTs.zip(indexedNTs.indices).toMap()
): List<T> by indexedNTs {
constructor(map: Map<Int, T>) : this(map.values.toSet(), map.values.toList(), map.entries.associate { it.value to it.key })
operator fun get(s: T): Int = ntIndices[s] ?: 1.also { println("Unknown nonterminal: $s"); null!! }
operator fun get(s: T): Int = ntIndices[s] ?: 1.also {
println("Unknown nonterminal: $s");
try {
throw IllegalArgumentException("Unknown nonterminal: $s")
} catch (e: IllegalArgumentException) {e.printStackTrace()}
null!! }
fun getUnsafe(s: T): Int? = ntIndices[s]
override fun toString(): String = indexedNTs.mapIndexed { i, it -> "$i: $it" }.joinToString("\n", "Bindex:\n", "\n")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ data class Segmentation(
acc
}

// Takes an IntRange of word indices and a String of words delimited by one or more whitespaces,
// Takes an IntRange of word indices and a String of words delimited by one or more whitespaces,
// and returns the corresponding IntRange of character indices in the original string.
// For example, if the input is (1..2, "a__bb___ca d e f"), the output is 3..10
fun IntRange.charIndicesOfWordsInString(str: String): IntRange {
Expand Down Expand Up @@ -125,6 +125,16 @@ fun preparseParseableLines(cfg: CFG, editorText: Σᐩ) {
}
}

fun preparseParseableLines(cfg: CFG, editorText: Σᐩ, recognizer: (String) -> Boolean) {
editorText.lineSequence()
.filter { it.isNotBlank() && !it.containsHole() }
.forEach { line ->
segmentationCacheHTML.getOrPut(cfg.hashCode() + line.hashCode()) {
(if (recognizer(line)) line.also { println("Recognized $it") } else "<u>$line</u>".also { println("Unrecognized $it") })
}
}
}

fun getOrComputeSegmentations(cfg: CFG, editorText: Σᐩ): List<Segmentation> =
editorText.split("---").last().lines() // Only preparse the section after the grammar
.filter { it.isNotBlank() && !it.containsHole() }
Expand Down
Loading

0 comments on commit 2838676

Please sign in to comment.