feat: added ansi slice function

Based on the truncate function. Specify a start point (inclusive) and end point (exclusive) in cell-width units, and get a string back at that exact size. Handles ansi-codes and multi-length characters.
charmbracelet · Jun 24, 2024 · bdd314f · bdd314f
1 parent e9f9f85
commit bdd314f
Show file tree

Hide file tree

Showing 3 changed files with 176 additions and 0 deletions.
diff --git a/ansi/slice.go b/ansi/slice.go
@@ -0,0 +1,111 @@
+package ansi
+
+import (
+	"bytes"
+
+	"github.com/charmbracelet/x/ansi/parser"
+	"github.com/rivo/uniseg"
+)
+
+// Slice slices a string to a given length, starting from X cell position.
+// This function is aware of ANSI escape codes and will not break them, and
+// accounts for wide-characters (such as East Asians and emojis).
+//
+// If a string is cut in the middle of a wide character, padding (in the
+// form of spaces) is inserted. This is done in order to maintain the width
+// of the input string.
+func Slice(s string, start int, end int) string {
+	if end < start || start == end || s == "" {
+		return ""
+	}
+
+	var cluster []byte
+	var buf bytes.Buffer
+	curPos := 0
+	pstate := parser.GroundState // initial state
+	b := []byte(s)
+
+	// Here we iterate over the bytes of the string and collect printable
+	// characters and runes. We also keep track of the scan position in cells.
+	// Once we reach the given length, we start ignoring characters and only
+	// collect ANSI escape codes until we reach the end of string.
+	for i := 0; i < len(b); i++ {
+		state, action := parser.Table.Transition(pstate, b[i])
+
+		switch action {
+		case parser.PrintAction:
+			// Single/zero width character, fast path
+			if utf8ByteLen(b[i]) <= 1 {
+				if curPos >= start && curPos < end {
+					buf.WriteByte(b[i])
+				}
+				curPos++
+				continue
+			}
+
+			// This action happens when we transition to the Utf8State.
+			var width int
+			cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
+			pstate = parser.GroundState
+			oldPos := curPos
+			curPos += width
+
+			// When reading multiple characters, we need to advance i further.
+			// We subtract one, because the loop adds that one by default.
+			i += len(cluster) - 1
+
+			// Before scope, skip
+			if curPos <= start {
+				continue
+			}
+
+			// Cut off at beginning, write begin padding
+			if oldPos < start {
+				diff := curPos - start
+				for diff > 0 {
+					buf.WriteByte(' ')
+					diff--
+				}
+				continue
+			}
+
+			// Fits inside perfectly, write
+			if curPos <= end {
+				buf.Write(cluster)
+				continue
+			}
+
+			// Cut off at end, write end padding
+			if oldPos < end {
+				diff := width - (curPos - end)
+				for diff > 0 {
+					buf.WriteByte(' ')
+					diff--
+				}
+				continue
+			}
+
+			// Beyond scope, skip
+
+		// Always collect ansi codes
+		default:
+			buf.WriteByte(b[i])
+		}
+
+		// Transition to the next state.
+		pstate = state
+	}
+
+	// Ensure width matches requested
+	if curPos < end-start {
+		diff := (end - start) - curPos
+
+		for diff > 0 {
+			buf.WriteByte(' ')
+			diff--
+		}
+	}
+
+	// Return sliced string
+	return buf.String()
+}
diff --git a/ansi/slice_test.go b/ansi/slice_test.go
@@ -0,0 +1,64 @@
+package ansi
+
+import (
+	"testing"
+)
+
+func TestSlice(t *testing.T) {
+	cases := []struct {
+		name   string
+		input  string
+		start  int
+		end    int
+		expect string
+	}{
+		{"empty", "", 0, 0, ""},
+		{"simple", "foobar", 0, 3, "foo"},
+		{"passthrough", "foobar", 0, 6, "foobar"},
+		{"ascii", "hello", 0, 3, "hel"},
+		{"emoji", "👋", 0, 2, "👋"},
+		{"wideemoji", "🫧", 0, 2, "🫧"},
+		{"controlemoji", "\x1b[31mhello 👋abc\x1b[0m", 0, 8, "\x1b[31mhello 👋\x1b[0m"},
+		{"osc8", "\x1b]8;;https://charm.sh\x1b\\Charmbracelet 🫧\x1b]8;;\x1b\\", 0, 5, "\x1b]8;;https://charm.sh\x1b\\Charm\x1b]8;;\x1b\\"},
+		{"osc8_8bit", "\x9d8;;https://charm.sh\x9cCharmbracelet 🫧\x9d8;;\x9c", 0, 5, "\x9d8;;https://charm.sh\x9cCharm\x9d8;;\x9c"},
+		{"noop", "\x1B[7m--", 0, 2, "\x1B[7m--"},
+		{"double_width", "\x1B[38;2;249;38;114m你好\x1B[0m", 0, 3, "\x1B[38;2;249;38;114m你 \x1B[0m"},
+		{"double_width_rune", "你", 0, 1, " "},
+		{"double_width_runes", "你好", 0, 2, "你"},
+		{"spaces_only", "    ", 0, 2, "  "},
+		{"same_width", "foo", 0, 3, "foo"},
+		{"style", "I really \x1B[38;2;249;38;114mlove\x1B[0m Go!", 0, 8, "I really\x1B[38;2;249;38;114m\x1B[0m"},
+		{"unicode", "\x1b[35mClaire‘s Boutique\x1b[0m", 0, 8, "\x1b[35mClaire‘s\x1b[0m"},
+		{"wide_chars", "こんにちは", 0, 7, "こんに "},
+		{"style_wide_chars", "\x1b[35mこんにちは\x1b[m", 0, 7, "\x1b[35mこんに \x1b[m"},
+		{"osc8_lf", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nสวัสดีสวัสดี\x1b]8;;\x1b\\", 0, 9, "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nส\x1b]8;;\x1b\\"},
+		{"beginning_whitespace", "👋🤭🥳😊👌", 1, 6, " 🤭🥳"},
+		{"ending_whitespace", "👋🤭🥳😊👌", 4, 9, "🥳😊 "},
+		{"double_whitespace", "👋🤭🥳😊👌", 1, 9, " 🤭🥳😊 "},
+		{"width_match", "abc", 0, 5, "abc  "},
+	}
+
+	for i, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			result := Slice(c.input, c.start, c.end)
+			if result != c.expect {
+				t.Errorf("test case %d failed: expected %q, got %q", i+1, c.expect, result)
+			}
+			originalLen := c.end - c.start
+			resultLen := StringWidth(result)
+			if originalLen != resultLen {
+				t.Errorf("test case %d failed: length does not match, expected %d, got %d", i+1, originalLen, resultLen)
+			}
+		})
+	}
+}
+
+func BenchmarkSliceString(b *testing.B) {
+	b.RunParallel(func(pb *testing.PB) {
+		b.ReportAllocs()
+		b.ResetTimer()
+		for pb.Next() {
+			Slice("foo", 1, 2)
+		}
+	})
+}
diff --git a/go.work.sum b/go.work.sum
@@ -28,6 +28,7 @@ golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE=
 golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY=
 golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk=
 golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0=
+golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
 golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/tools v0.1.12 h1:VveCTK38A2rkS8ZqFY25HIDFscX5X9OoEhJd3quQmXU=
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=