-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Based on the truncate function. Specify a start point (inclusive) and end point (exclusive) in cell-width units, and get a string back at that exact size. Handles ansi-codes and multi-length characters.
- Loading branch information
1 parent
e9f9f85
commit bdd314f
Showing
3 changed files
with
176 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
package ansi | ||
|
||
import ( | ||
"bytes" | ||
|
||
"github.com/charmbracelet/x/ansi/parser" | ||
"github.com/rivo/uniseg" | ||
) | ||
|
||
// Slice slices a string to a given length, starting from X cell position. | ||
// This function is aware of ANSI escape codes and will not break them, and | ||
// accounts for wide-characters (such as East Asians and emojis). | ||
// | ||
// If a string is cut in the middle of a wide character, padding (in the | ||
// form of spaces) is inserted. This is done in order to maintain the width | ||
// of the input string. | ||
func Slice(s string, start int, end int) string { | ||
if end < start || start == end || s == "" { | ||
return "" | ||
} | ||
|
||
var cluster []byte | ||
var buf bytes.Buffer | ||
curPos := 0 | ||
pstate := parser.GroundState // initial state | ||
b := []byte(s) | ||
|
||
// Here we iterate over the bytes of the string and collect printable | ||
// characters and runes. We also keep track of the scan position in cells. | ||
// Once we reach the given length, we start ignoring characters and only | ||
// collect ANSI escape codes until we reach the end of string. | ||
for i := 0; i < len(b); i++ { | ||
state, action := parser.Table.Transition(pstate, b[i]) | ||
|
||
switch action { | ||
case parser.PrintAction: | ||
// Single/zero width character, fast path | ||
if utf8ByteLen(b[i]) <= 1 { | ||
if curPos >= start && curPos < end { | ||
buf.WriteByte(b[i]) | ||
} | ||
curPos++ | ||
continue | ||
} | ||
|
||
// This action happens when we transition to the Utf8State. | ||
var width int | ||
cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1) | ||
pstate = parser.GroundState | ||
oldPos := curPos | ||
curPos += width | ||
|
||
// When reading multiple characters, we need to advance i further. | ||
// We subtract one, because the loop adds that one by default. | ||
i += len(cluster) - 1 | ||
|
||
// Before scope, skip | ||
if curPos <= start { | ||
continue | ||
} | ||
|
||
// Cut off at beginning, write begin padding | ||
if oldPos < start { | ||
diff := curPos - start | ||
for diff > 0 { | ||
buf.WriteByte(' ') | ||
diff-- | ||
} | ||
continue | ||
} | ||
|
||
// Fits inside perfectly, write | ||
if curPos <= end { | ||
buf.Write(cluster) | ||
continue | ||
} | ||
|
||
// Cut off at end, write end padding | ||
if oldPos < end { | ||
diff := width - (curPos - end) | ||
for diff > 0 { | ||
buf.WriteByte(' ') | ||
diff-- | ||
} | ||
continue | ||
} | ||
|
||
// Beyond scope, skip | ||
|
||
// Always collect ansi codes | ||
default: | ||
buf.WriteByte(b[i]) | ||
} | ||
|
||
// Transition to the next state. | ||
pstate = state | ||
} | ||
|
||
// Ensure width matches requested | ||
if curPos < end-start { | ||
diff := (end - start) - curPos | ||
|
||
for diff > 0 { | ||
buf.WriteByte(' ') | ||
diff-- | ||
} | ||
} | ||
|
||
// Return sliced string | ||
return buf.String() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
package ansi | ||
|
||
import ( | ||
"testing" | ||
) | ||
|
||
func TestSlice(t *testing.T) { | ||
cases := []struct { | ||
name string | ||
input string | ||
start int | ||
end int | ||
expect string | ||
}{ | ||
{"empty", "", 0, 0, ""}, | ||
{"simple", "foobar", 0, 3, "foo"}, | ||
{"passthrough", "foobar", 0, 6, "foobar"}, | ||
{"ascii", "hello", 0, 3, "hel"}, | ||
{"emoji", "👋", 0, 2, "👋"}, | ||
{"wideemoji", "🫧", 0, 2, "🫧"}, | ||
{"controlemoji", "\x1b[31mhello 👋abc\x1b[0m", 0, 8, "\x1b[31mhello 👋\x1b[0m"}, | ||
{"osc8", "\x1b]8;;https://charm.sh\x1b\\Charmbracelet 🫧\x1b]8;;\x1b\\", 0, 5, "\x1b]8;;https://charm.sh\x1b\\Charm\x1b]8;;\x1b\\"}, | ||
{"osc8_8bit", "\x9d8;;https://charm.sh\x9cCharmbracelet 🫧\x9d8;;\x9c", 0, 5, "\x9d8;;https://charm.sh\x9cCharm\x9d8;;\x9c"}, | ||
{"noop", "\x1B[7m--", 0, 2, "\x1B[7m--"}, | ||
{"double_width", "\x1B[38;2;249;38;114m你好\x1B[0m", 0, 3, "\x1B[38;2;249;38;114m你 \x1B[0m"}, | ||
{"double_width_rune", "你", 0, 1, " "}, | ||
{"double_width_runes", "你好", 0, 2, "你"}, | ||
{"spaces_only", " ", 0, 2, " "}, | ||
{"same_width", "foo", 0, 3, "foo"}, | ||
{"style", "I really \x1B[38;2;249;38;114mlove\x1B[0m Go!", 0, 8, "I really\x1B[38;2;249;38;114m\x1B[0m"}, | ||
{"unicode", "\x1b[35mClaire‘s Boutique\x1b[0m", 0, 8, "\x1b[35mClaire‘s\x1b[0m"}, | ||
{"wide_chars", "こんにちは", 0, 7, "こんに "}, | ||
{"style_wide_chars", "\x1b[35mこんにちは\x1b[m", 0, 7, "\x1b[35mこんに \x1b[m"}, | ||
{"osc8_lf", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nสวัสดีสวัสดี\x1b]8;;\x1b\\", 0, 9, "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nส\x1b]8;;\x1b\\"}, | ||
{"beginning_whitespace", "👋🤭🥳😊👌", 1, 6, " 🤭🥳"}, | ||
{"ending_whitespace", "👋🤭🥳😊👌", 4, 9, "🥳😊 "}, | ||
{"double_whitespace", "👋🤭🥳😊👌", 1, 9, " 🤭🥳😊 "}, | ||
{"width_match", "abc", 0, 5, "abc "}, | ||
} | ||
|
||
for i, c := range cases { | ||
t.Run(c.name, func(t *testing.T) { | ||
result := Slice(c.input, c.start, c.end) | ||
if result != c.expect { | ||
t.Errorf("test case %d failed: expected %q, got %q", i+1, c.expect, result) | ||
} | ||
originalLen := c.end - c.start | ||
resultLen := StringWidth(result) | ||
if originalLen != resultLen { | ||
t.Errorf("test case %d failed: length does not match, expected %d, got %d", i+1, originalLen, resultLen) | ||
} | ||
}) | ||
} | ||
} | ||
|
||
func BenchmarkSliceString(b *testing.B) { | ||
b.RunParallel(func(pb *testing.PB) { | ||
b.ReportAllocs() | ||
b.ResetTimer() | ||
for pb.Next() { | ||
Slice("foo", 1, 2) | ||
} | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters