Skip to content

Commit

Permalink
Allow for styles to persist over lines
Browse files Browse the repository at this point in the history
  • Loading branch information
justin-taylor committed Dec 17, 2024
1 parent f285923 commit 823026e
Show file tree
Hide file tree
Showing 8 changed files with 122 additions and 46 deletions.
51 changes: 25 additions & 26 deletions srt.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ func ReadFromSRT(i io.Reader) (o *Subtitles, err error) {
o = NewSubtitles()
var scanner = bufio.NewScanner(i)

styles := StyleAttributes{}
// Scan
var line string
var lineNum int
Expand All @@ -58,6 +59,10 @@ func ReadFromSRT(i io.Reader) (o *Subtitles, err error) {

// Line contains time boundaries
if strings.Contains(line, srtTimeBoundariesSeparator) {

// reset styles
styles = StyleAttributes{}

// Remove last item of previous subtitle since it should be the index.
// If the last line is empty then the item is missing an index.
var index string
Expand Down Expand Up @@ -118,7 +123,8 @@ func ReadFromSRT(i io.Reader) (o *Subtitles, err error) {
o.Items = append(o.Items, s)
} else {
// Add text
if l := parseTextSrt(strings.TrimSpace(line)); len(l.Items) > 0 {
var l Line
if l, styles = parseTextSrt(line, styles); len(l.Items) > 0 {
s.Lines = append(s.Lines, l)
}
}
Expand All @@ -127,24 +133,18 @@ func ReadFromSRT(i io.Reader) (o *Subtitles, err error) {
}

// parseTextSrt parses the input line to fill the Line
func parseTextSrt(i string) (o Line) {
func parseTextSrt(i string, styles StyleAttributes) (Line, StyleAttributes) {
// special handling needed for empty line
if strings.TrimSpace(i) == "" {
o := Line{}
if i == "" {
o.Items = []LineItem{{Text: ""}}
return
return o, styles
}

// Create tokenizer
tr := html.NewTokenizer(strings.NewReader(i))

// Loop
var (
bold bool
italic bool
underline bool
color *string
pos byte
)
for {
// Get next tag
t := tr.Next()
Expand All @@ -164,39 +164,38 @@ func parseTextSrt(i string) (o Line) {
// Parse italic/bold/underline
switch token.Data {
case "b":
bold = false
styles.SRTBold = false
case "i":
italic = false
styles.SRTItalics = false
case "u":
underline = false
styles.SRTUnderline = false
case "font":
color = nil
styles.SRTColor = nil
}
case html.StartTagToken:
// Parse italic/bold/underline
switch token.Data {
case "b":
bold = true
styles.SRTBold = true
case "i":
italic = true
styles.SRTItalics = true
case "u":
underline = true
styles.SRTUnderline = true
case "font":
if c := htmlTokenAttribute(&token, "color"); c != nil {
color = c
styles.SRTColor = c
}
}
case html.TextToken:
if s := strings.TrimSpace(raw); s != "" {
// Get style attribute
var sa *StyleAttributes
if bold || italic || underline || color != nil || pos != 0 {
if styles.SRTBold || styles.SRTColor != nil || styles.SRTItalics || styles.SRTUnderline {
sa = &StyleAttributes{
SRTBold: bold,
SRTColor: color,
SRTItalics: italic,
SRTPosition: pos,
SRTUnderline: underline,
SRTBold: styles.SRTBold,
SRTColor: styles.SRTColor,
SRTItalics: styles.SRTItalics,
SRTUnderline: styles.SRTUnderline,
}
sa.propagateSRTAttributes()
}
Expand All @@ -209,7 +208,7 @@ func parseTextSrt(i string) (o Line) {
}
}
}
return
return o, styles
}

// formatDurationSRT formats an .srt duration
Expand Down
25 changes: 18 additions & 7 deletions srt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func TestSRTStyled(t *testing.T) {
assert.NoError(t, err)

// assert the items are properly parsed
assert.Len(t, s.Items, 6)
assert.Len(t, s.Items, 9)
assert.Equal(t, 17*time.Second+985*time.Millisecond, s.Items[0].StartAt)
assert.Equal(t, 20*time.Second+521*time.Millisecond, s.Items[0].EndAt)
assert.Equal(t, "[instrumental music]", s.Items[0].Lines[0].String())
Expand All @@ -80,36 +80,47 @@ func TestSRTStyled(t *testing.T) {
assert.Equal(t, 1*time.Minute+31*time.Second+992*time.Millisecond, s.Items[5].EndAt)
assert.Equal(t, "[automated]", s.Items[5].Lines[0].String())
assert.Equal(t, "'The time is 7:35.'", s.Items[5].Lines[1].String())
assert.Equal(t, "Test with multi line italics", s.Items[6].Lines[0].String())
assert.Equal(t, "Terminated on the next line", s.Items[6].Lines[1].String())
assert.Equal(t, "Unterminated styles", s.Items[7].Lines[0].String())
assert.Equal(t, "Do no fall to the next item", s.Items[8].Lines[0].String())

// assert the styles of the items
assert.Len(t, s.Items, 6)
assert.Equal(t, "#00ff00", *s.Items[0].Lines[0].Items[0].InlineStyle.SRTColor)
assert.Zero(t, s.Items[0].Lines[0].Items[0].InlineStyle.SRTPosition)
assert.True(t, s.Items[0].Lines[0].Items[0].InlineStyle.SRTBold)
assert.False(t, s.Items[0].Lines[0].Items[0].InlineStyle.SRTItalics)
assert.False(t, s.Items[0].Lines[0].Items[0].InlineStyle.SRTUnderline)
assert.Equal(t, "#ff00ff", *s.Items[1].Lines[0].Items[0].InlineStyle.SRTColor)
assert.Zero(t, s.Items[1].Lines[0].Items[0].InlineStyle.SRTPosition)
assert.False(t, s.Items[1].Lines[0].Items[0].InlineStyle.SRTBold)
assert.False(t, s.Items[1].Lines[0].Items[0].InlineStyle.SRTItalics)
assert.False(t, s.Items[1].Lines[0].Items[0].InlineStyle.SRTUnderline)
assert.Equal(t, "#00ff00", *s.Items[2].Lines[0].Items[0].InlineStyle.SRTColor)
assert.Zero(t, s.Items[2].Lines[0].Items[0].InlineStyle.SRTPosition)
assert.False(t, s.Items[2].Lines[0].Items[0].InlineStyle.SRTBold)
assert.False(t, s.Items[2].Lines[0].Items[0].InlineStyle.SRTItalics)
assert.False(t, s.Items[2].Lines[0].Items[0].InlineStyle.SRTUnderline)
assert.Nil(t, s.Items[3].Lines[0].Items[0].InlineStyle.SRTColor)
assert.Zero(t, s.Items[3].Lines[0].Items[0].InlineStyle.SRTPosition)
assert.True(t, s.Items[3].Lines[0].Items[0].InlineStyle.SRTBold)
assert.False(t, s.Items[3].Lines[0].Items[0].InlineStyle.SRTItalics)
assert.True(t, s.Items[3].Lines[0].Items[0].InlineStyle.SRTUnderline)
assert.Nil(t, s.Items[4].Lines[0].Items[0].InlineStyle)
assert.Nil(t, s.Items[5].Lines[0].Items[0].InlineStyle)
assert.Nil(t, s.Items[5].Lines[1].Items[0].InlineStyle.SRTColor)
assert.Zero(t, s.Items[5].Lines[1].Items[0].InlineStyle.SRTPosition)
assert.False(t, s.Items[5].Lines[1].Items[0].InlineStyle.SRTBold)
assert.True(t, s.Items[5].Lines[1].Items[0].InlineStyle.SRTItalics)
assert.False(t, s.Items[5].Lines[1].Items[0].InlineStyle.SRTUnderline)
assert.True(t, s.Items[6].Lines[0].Items[0].InlineStyle.SRTItalics)
assert.False(t, s.Items[6].Lines[0].Items[0].InlineStyle.SRTUnderline)
assert.False(t, s.Items[6].Lines[0].Items[0].InlineStyle.SRTBold)
assert.Nil(t, s.Items[6].Lines[0].Items[0].InlineStyle.SRTColor)
assert.True(t, s.Items[6].Lines[1].Items[0].InlineStyle.SRTItalics)
assert.False(t, s.Items[6].Lines[1].Items[0].InlineStyle.SRTUnderline)
assert.False(t, s.Items[6].Lines[1].Items[0].InlineStyle.SRTBold)
assert.Nil(t, s.Items[6].Lines[1].Items[0].InlineStyle.SRTColor)
assert.True(t, s.Items[7].Lines[0].Items[0].InlineStyle.SRTItalics)
assert.False(t, s.Items[7].Lines[0].Items[0].InlineStyle.SRTUnderline)
assert.False(t, s.Items[7].Lines[0].Items[0].InlineStyle.SRTBold)
assert.Nil(t, s.Items[7].Lines[0].Items[0].InlineStyle.SRTColor)
assert.Nil(t, s.Items[8].Lines[0].Items[0].InlineStyle)

// Write to srt
w := &bytes.Buffer{}
Expand Down
13 changes: 13 additions & 0 deletions testdata/example-in-styled.srt
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,16 @@
00:01:29,590 --> 00:01:31,992
[automated]
<i>'The time is 7:35.'</i>

7
00:08:00,000 --> 00:09:00,000
<i>Test with multi line italics
Terminated on the next line</i>

8
00:09:00,000 --> 00:10:00,000
<i>Unterminated styles
9
00:10:00,000 --> 00:11:00,000
Do no fall to the next item
13 changes: 13 additions & 0 deletions testdata/example-out-styled.srt
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,16 @@
00:01:29,590 --> 00:01:31,992
[automated]
<i>'The time is 7:35.'</i>

7
00:08:00,000 --> 00:09:00,000
<i>Test with multi line italics</i>
<i>Terminated on the next line</i>

8
00:09:00,000 --> 00:10:00,000
<i>Unterminated styles</i>

9
00:10:00,000 --> 00:11:00,000
Do no fall to the next item
13 changes: 13 additions & 0 deletions testdata/example-out-styled.vtt
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,16 @@ WEBVTT
00:01:29.590 --> 00:01:31.992
[automated]
<i>'The time is 7:35.'</i>

7
00:08:00.000 --> 00:09:00.000
<i>Test with multi line italics</i>
<i>Terminated on the next line</i>

8
00:09:00.000 --> 00:10:00.000
<i>Unterminated styles</i>

9
00:10:00.000 --> 00:11:00.000
Do no fall to the next item
14 changes: 9 additions & 5 deletions webvtt.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) {
var scanner = bufio.NewScanner(i)
var line string
var lineNum int
webVTTTagStack := make([]WebVTTTag, 0, 16)

// Skip the header
for scanner.Scan() {
Expand Down Expand Up @@ -167,6 +168,9 @@ func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) {
strings.HasSuffix(webVTTStyles.WebVTTStyles[len(webVTTStyles.WebVTTStyles)-1], "}") {
blockName = ""
}
// Reset tag stack
webVTTTagStack = make([]WebVTTTag, 0, 16)

// Region
case strings.HasPrefix(line, "Region: "):
// Add region styles
Expand Down Expand Up @@ -317,8 +321,9 @@ func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) {
webVTTStyles.WebVTTStyles = append(webVTTStyles.WebVTTStyles, line)
case webvttBlockNameText:
// Parse line
if l := parseTextWebVTT(line); len(l.Items) > 0 {
if l, stack := parseTextWebVTT(line, webVTTTagStack); len(l.Items) > 0 {
item.Lines = append(item.Lines, l)
webVTTTagStack = stack
}
default:
// This is the ID
Expand All @@ -330,11 +335,10 @@ func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) {
}

// parseTextWebVTT parses the input line to fill the Line
func parseTextWebVTT(i string) (o Line) {
func parseTextWebVTT(i string, webVTTTagStack []WebVTTTag) (Line, []WebVTTTag) {
// Create tokenizer
tr := html.NewTokenizer(strings.NewReader(i))

webVTTTagStack := make([]WebVTTTag, 0, 16)
o := Line{}

// Loop
for {
Expand Down Expand Up @@ -400,7 +404,7 @@ func parseTextWebVTT(i string) (o Line) {
o.Items = append(o.Items, parseTextWebVTTTextToken(sa, string(tr.Raw()))...)
}
}
return
return o, webVTTTagStack
}

func parseTextWebVTTTextToken(sa *StyleAttributes, line string) (ret []LineItem) {
Expand Down
12 changes: 6 additions & 6 deletions webvtt_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ func TestParseTextWebVTT(t *testing.T) {
t.Run("When both voice tags are available", func(t *testing.T) {
testData := `<v Bob>Correct tag</v>`

s := parseTextWebVTT(testData)
s, _ := parseTextWebVTT(testData, make([]WebVTTTag, 0, 16))
assert.Equal(t, "Bob", s.VoiceName)
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "Correct tag", s.Items[0].Text)
Expand All @@ -22,7 +22,7 @@ func TestParseTextWebVTT(t *testing.T) {
t.Run("When there is no end tag", func(t *testing.T) {
testData := `<v Bob> Text without end tag`

s := parseTextWebVTT(testData)
s, _ := parseTextWebVTT(testData, make([]WebVTTTag, 0, 16))
assert.Equal(t, "Bob", s.VoiceName)
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "Text without end tag", s.Items[0].Text)
Expand All @@ -31,7 +31,7 @@ func TestParseTextWebVTT(t *testing.T) {
t.Run("When the end tag is correct", func(t *testing.T) {
testData := `<v Bob>Incorrect end tag</vi>`

s := parseTextWebVTT(testData)
s, _ := parseTextWebVTT(testData, make([]WebVTTTag, 0, 16))
assert.Equal(t, "Bob", s.VoiceName)
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "Incorrect end tag", s.Items[0].Text)
Expand All @@ -40,7 +40,7 @@ func TestParseTextWebVTT(t *testing.T) {
t.Run("When inline timestamps are included", func(t *testing.T) {
testData := `<00:01:01.000>With inline <00:01:02.000>timestamps`

s := parseTextWebVTT(testData)
s, _ := parseTextWebVTT(testData, make([]WebVTTTag, 0, 16))
assert.Equal(t, 2, len(s.Items))
assert.Equal(t, "With inline", s.Items[0].Text)
assert.Equal(t, time.Minute+time.Second, s.Items[0].StartAt)
Expand All @@ -51,7 +51,7 @@ func TestParseTextWebVTT(t *testing.T) {
t.Run("When inline timestamps together", func(t *testing.T) {
testData := `<00:01:01.000><00:01:02.000>With timestamp tags together`

s := parseTextWebVTT(testData)
s, _ := parseTextWebVTT(testData, make([]WebVTTTag, 0, 16))
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "With timestamp tags together", s.Items[0].Text)
assert.Equal(t, time.Minute+2*time.Second, s.Items[0].StartAt)
Expand All @@ -60,7 +60,7 @@ func TestParseTextWebVTT(t *testing.T) {
t.Run("When inline timestamps is at end", func(t *testing.T) {
testData := `With end timestamp<00:01:02.000>`

s := parseTextWebVTT(testData)
s, _ := parseTextWebVTT(testData, make([]WebVTTTag, 0, 16))
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "With end timestamp", s.Items[0].Text)
assert.Equal(t, time.Duration(0), s.Items[0].StartAt)
Expand Down
27 changes: 25 additions & 2 deletions webvtt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,22 @@ func TestWebVTTTags(t *testing.T) {
<v Joe>Joe says something</v> <v Bob>Bob says something</v>
00:06:00.000 --> 00:07:00.000
Text with a <00:06:30.000>timestamp in the middle`
Text with a <00:06:30.000>timestamp in the middle
00:08:00.000 --> 00:09:00.000
<i>Test with multi line italics
Terminated on the next line</i>
00:09:00.000 --> 00:10:00.000
<i>Unterminated styles
00:10:00.000 --> 00:11:00.000
Do no fall to the next item`

s, err := astisub.ReadFromWebVTT(strings.NewReader(testData))
require.NoError(t, err)

require.Len(t, s.Items, 6)
require.Len(t, s.Items, 9)

b := &bytes.Buffer{}
err = s.WriteToWebVTT(b)
Expand Down Expand Up @@ -199,5 +209,18 @@ func TestWebVTTTags(t *testing.T) {
6
00:06:00.000 --> 00:07:00.000
Text with a <00:06:30.000>timestamp in the middle
7
00:08:00.000 --> 00:09:00.000
<i>Test with multi line italics</i>
<i>Terminated on the next line</i>
8
00:09:00.000 --> 00:10:00.000
<i>Unterminated styles</i>
9
00:10:00.000 --> 00:11:00.000
Do no fall to the next item
`, b.String())
}

0 comments on commit 823026e

Please sign in to comment.