diff --git a/srt.go b/srt.go
index 97970b9..8abca3b 100644
--- a/srt.go
+++ b/srt.go
@@ -4,9 +4,12 @@ import (
"bufio"
"fmt"
"io"
+ "regexp"
"strconv"
"strings"
"time"
+
+ "golang.org/x/net/html"
)
// Constants
@@ -17,6 +20,7 @@ const (
// Vars
var (
bytesSRTTimeBoundariesSeparator = []byte(srtTimeBoundariesSeparator)
+ regexpSRTSSATags = regexp.MustCompile(`{\\.*?}`)
)
// parseDurationSRT parses an .srt duration
@@ -111,12 +115,134 @@ func ReadFromSRT(i io.Reader) (o *Subtitles, err error) {
o.Items = append(o.Items, s)
} else {
// Add text
- s.Lines = append(s.Lines, Line{Items: []LineItem{{Text: strings.TrimSpace(line)}}})
+ if l := parseTextSrt(line); len(l.Items) > 0 {
+ s.Lines = append(s.Lines, l)
+ }
}
}
return
}
+// parseTextSrt parses the input line to fill the Line
+func parseTextSrt(i string) (o Line) {
+ // special handling needed for empty line
+ if strings.TrimSpace(i) == "" {
+ o.Items = []LineItem{{Text: ""}}
+ return
+ }
+
+ // Create tokenizer
+ tr := html.NewTokenizer(strings.NewReader(i))
+
+ // Loop
+ var (
+ bold bool
+ italic bool
+ underline bool
+ color *string
+ pos byte
+ )
+ for {
+ // Get next tag
+ t := tr.Next()
+
+ // Process error
+ if err := tr.Err(); err != nil {
+ break
+ }
+
+ // Get unmodified text
+ raw := string(tr.Raw())
+ // Get current token
+ token := tr.Token()
+
+ switch t {
+ case html.EndTagToken:
+ // Parse italic/bold/underline
+ switch token.Data {
+ case "b":
+ bold = false
+ case "i":
+ italic = false
+ case "u":
+ underline = false
+ case "font":
+ color = nil
+ }
+ case html.StartTagToken:
+ // Parse italic/bold/underline
+ switch token.Data {
+ case "b":
+ bold = true
+ case "i":
+ italic = true
+ case "u":
+ underline = true
+ case "font":
+ if c := htmlTokenAttribute(&token, "color"); c != nil {
+ color = c
+ }
+ }
+ case html.TextToken:
+ if s := strings.TrimSpace(raw); s != "" {
+ // Remove all SSA/ASS tags from text
+ s := regexpSRTSSATags.ReplaceAllStringFunc(s, removeSSATagsWithPos(&pos))
+ // Get style attribute
+ var sa *StyleAttributes
+ if bold || italic || underline || color != nil || pos != 0 {
+ sa = &StyleAttributes{
+ SRTBold: bold,
+ SRTColor: color,
+ SRTItalics: italic,
+ SRTPosition: pos,
+ SRTUnderline: underline,
+ }
+ sa.propagateSRTAttributes()
+ }
+
+ // Append item
+ o.Items = append(o.Items, LineItem{
+ InlineStyle: sa,
+ Text: s,
+ })
+ }
+ }
+ }
+ return
+}
+
+// Removes SSA/ASS tags from subtitle text
+// and extracts position if detected
+func removeSSATagsWithPos(pos *byte) func(string) string {
+ return func(i string) string {
+ // Based on in the following information:
+ // https://superuser.com/a/1228528
+ switch i {
+ case `{\an7}`: // top-left
+ *pos = 7
+ case `{\an8}`: // top-center
+ *pos = 8
+ case `{\an9}`: // top-right
+ *pos = 9
+ case `{\an4}`: // middle-left
+ *pos = 4
+ case `{\an5}`: // middle-center
+ *pos = 5
+ case `{\an6}`: // middle-right
+ *pos = 6
+ case `{\an1}`: // bottom-left
+ *pos = 1
+ case `{\an2}`: // bottom-center
+ *pos = 2
+ case `{\an3}`: // bottom-right
+ *pos = 3
+ }
+
+ // Remove tag from subtitle text
+ return ""
+ }
+}
+
// formatDurationSRT formats an .srt duration
func formatDurationSRT(i time.Duration) string {
return formatDuration(i, ",", 3)
@@ -146,8 +272,7 @@ func (s Subtitles) WriteToSRT(o io.Writer) (err error) {
// Loop through lines
for _, l := range v.Lines {
- c = append(c, []byte(l.String())...)
- c = append(c, bytesLineSeparator...)
+ c = append(c, l.srtBytes()...)
}
// Add new line
@@ -164,3 +289,65 @@ func (s Subtitles) WriteToSRT(o io.Writer) (err error) {
}
return
}
+
+func (l Line) srtBytes() (c []byte) {
+ for idx, li := range l.Items {
+ c = append(c, li.srtBytes()...)
+ // condition to avoid adding space as the last character.
+ if idx < len(l.Items)-1 {
+ c = append(c, []byte(" ")...)
+ }
+ }
+ c = append(c, bytesLineSeparator...)
+ return
+}
+
+func (li LineItem) srtBytes() (c []byte) {
+ // Get color
+ var color string
+ if li.InlineStyle != nil && li.InlineStyle.SRTColor != nil {
+ color = *li.InlineStyle.SRTColor
+ }
+
+ // Get bold/italics/underline
+ b := li.InlineStyle != nil && li.InlineStyle.SRTBold
+ i := li.InlineStyle != nil && li.InlineStyle.SRTItalics
+ u := li.InlineStyle != nil && li.InlineStyle.SRTUnderline
+
+ // Get position
+ var pos byte
+ if li.InlineStyle != nil {
+ pos = li.InlineStyle.SRTPosition
+ }
+
+ // Append
+ if color != "" {
+ c = append(c, []byte("")...)
+ }
+ if b {
+ c = append(c, []byte("")...)
+ }
+ if i {
+ c = append(c, []byte("")...)
+ }
+ if u {
+ c = append(c, []byte("")...)
+ }
+ if pos != 0 {
+ c = append(c, []byte(fmt.Sprintf(`{\an%d}`, pos))...)
+ }
+ c = append(c, []byte(li.Text)...)
+ if u {
+ c = append(c, []byte("")...)
+ }
+ if i {
+ c = append(c, []byte("")...)
+ }
+ if b {
+ c = append(c, []byte("")...)
+ }
+ if color != "" {
+ c = append(c, []byte("")...)
+ }
+ return
+}
diff --git a/srt_test.go b/srt_test.go
index f44fbbd..aa81697 100644
--- a/srt_test.go
+++ b/srt_test.go
@@ -46,3 +46,23 @@ func TestSRTMissingSequence(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, string(c), w.String())
}
+
+func TestSRTStyled(t *testing.T) {
+ // Open
+ s, err := astisub.OpenFile("./testdata/example-styled-in.srt")
+ assert.NoError(t, err)
+ assertStyledSubtitleItems(t, s)
+ assertSRTSubtitleStyles(t, s)
+
+ // No subtitles to write
+ w := &bytes.Buffer{}
+ err = astisub.Subtitles{}.WriteToSRT(w)
+ assert.EqualError(t, err, astisub.ErrNoSubtitlesToWrite.Error())
+
+ // Write
+ c, err := ioutil.ReadFile("./testdata/example-styled-out.srt")
+ assert.NoError(t, err)
+ err = s.WriteToSRT(w)
+ assert.NoError(t, err)
+ assert.Equal(t, string(c), w.String())
+}
diff --git a/subtitles.go b/subtitles.go
index 3225979..727ba07 100644
--- a/subtitles.go
+++ b/subtitles.go
@@ -12,6 +12,7 @@ import (
"time"
"github.com/asticode/go-astikit"
+ "golang.org/x/net/html"
)
// Bytes
@@ -173,6 +174,11 @@ var (
// StyleAttributes represents style attributes
type StyleAttributes struct {
+ SRTBold bool
+ SRTColor *string
+ SRTItalics bool
+ SRTPosition byte // 1-9 numpad layout
+ SRTUnderline bool
SSAAlignment *int
SSAAlphaLevel *float64
SSAAngle *float64 // degrees
@@ -236,7 +242,9 @@ type StyleAttributes struct {
TTMLWritingMode *string
TTMLZIndex *int
WebVTTAlign string
+ WebVTTBold bool
WebVTTItalics bool
+ WebVTTUnderline bool
WebVTTLine string
WebVTTLines int
WebVTTPosition string
@@ -248,6 +256,45 @@ type StyleAttributes struct {
WebVTTWidth string
}
+func (sa *StyleAttributes) propagateSRTAttributes() {
+ // copy relevant attrs to WebVTT ones
+ if sa.SRTColor != nil {
+ // TODO: handle non-default colors that need custom styles
+ sa.TTMLColor = sa.SRTColor
+ }
+
+ switch sa.SRTPosition {
+ case 7: // top-left
+ sa.WebVTTAlign = "left"
+ sa.WebVTTPosition = "10%"
+ case 8: // top-center
+ sa.WebVTTPosition = "10%"
+ case 9: // top-right
+ sa.WebVTTAlign = "right"
+ sa.WebVTTPosition = "10%"
+ case 4: // middle-left
+ sa.WebVTTAlign = "left"
+ sa.WebVTTPosition = "50%"
+ case 5: // middle-center
+ sa.WebVTTPosition = "50%"
+ case 6: // middle-right
+ sa.WebVTTAlign = "right"
+ sa.WebVTTPosition = "50%"
+ case 1: // bottom-left
+ sa.WebVTTAlign = "left"
+ sa.WebVTTPosition = "90%"
+ case 2: // bottom-center
+ sa.WebVTTPosition = "90%"
+ case 3: // bottom-right
+ sa.WebVTTAlign = "right"
+ sa.WebVTTPosition = "90%"
+ }
+
+ sa.WebVTTBold = sa.SRTBold
+ sa.WebVTTItalics = sa.SRTItalics
+ sa.WebVTTUnderline = sa.SRTUnderline
+}
+
func (sa *StyleAttributes) propagateSSAAttributes() {}
func (sa *StyleAttributes) propagateSTLAttributes() {
@@ -321,7 +368,15 @@ func (sa *StyleAttributes) propagateTTMLAttributes() {
}
}
-func (sa *StyleAttributes) propagateWebVTTAttributes() {}
+func (sa *StyleAttributes) propagateWebVTTAttributes() {
+ // copy relevant attrs to SRT ones
+ if sa.TTMLColor != nil {
+ sa.SRTColor = sa.TTMLColor
+ }
+ sa.SRTBold = sa.WebVTTBold
+ sa.SRTItalics = sa.WebVTTItalics
+ sa.SRTUnderline = sa.WebVTTUnderline
+}
// Metadata represents metadata
// TODO Merge attributes
@@ -802,3 +857,14 @@ func appendStringToBytesWithNewLine(i []byte, s string) (o []byte) {
o = append(o, bytesLineSeparator...)
return
}
+
+func htmlTokenAttribute(t *html.Token, key string) *string {
+
+ for _, attr := range t.Attr {
+ if attr.Key == key {
+ return &attr.Val
+ }
+ }
+
+ return nil
+}
diff --git a/subtitles_test.go b/subtitles_test.go
index a8012ba..84597b1 100644
--- a/subtitles_test.go
+++ b/subtitles_test.go
@@ -40,6 +40,64 @@ func assertSubtitleItems(t *testing.T, i *astisub.Subtitles) {
assert.Equal(t, "electronic melody)", i.Items[5].Lines[1].String())
}
+func assertStyledSubtitleItems(t *testing.T, i *astisub.Subtitles) {
+ assert.Len(t, i.Items, 6)
+ assert.Equal(t, 17*time.Second+985*time.Millisecond, i.Items[0].StartAt)
+ assert.Equal(t, 20*time.Second+521*time.Millisecond, i.Items[0].EndAt)
+ assert.Equal(t, "[instrumental music]", i.Items[0].Lines[0].String())
+ assert.Equal(t, 47*time.Second+115*time.Millisecond, i.Items[1].StartAt)
+ assert.Equal(t, 48*time.Second+282*time.Millisecond, i.Items[1].EndAt)
+ assert.Equal(t, "[ticks]", i.Items[1].Lines[0].String())
+ assert.Equal(t, 58*time.Second+192*time.Millisecond, i.Items[2].StartAt)
+ assert.Equal(t, 59*time.Second+727*time.Millisecond, i.Items[2].EndAt)
+ assert.Equal(t, "[instrumental music]", i.Items[2].Lines[0].String())
+ assert.Equal(t, 1*time.Minute+1*time.Second+662*time.Millisecond, i.Items[3].StartAt)
+ assert.Equal(t, 1*time.Minute+3*time.Second+63*time.Millisecond, i.Items[3].EndAt)
+ assert.Equal(t, "[dog barking]", i.Items[3].Lines[0].String())
+ assert.Equal(t, 1*time.Minute+26*time.Second+787*time.Millisecond, i.Items[4].StartAt)
+ assert.Equal(t, 1*time.Minute+29*time.Second+523*time.Millisecond, i.Items[4].EndAt)
+ assert.Equal(t, "[beeping]", i.Items[4].Lines[0].String())
+ assert.Equal(t, 1*time.Minute+29*time.Second+590*time.Millisecond, i.Items[5].StartAt)
+ assert.Equal(t, 1*time.Minute+31*time.Second+992*time.Millisecond, i.Items[5].EndAt)
+ assert.Equal(t, "[automated]", i.Items[5].Lines[0].String())
+ assert.Equal(t, "'The time is 7:35.'", i.Items[5].Lines[1].String())
+}
+
+func assertSRTSubtitleStyles(t *testing.T, i *astisub.Subtitles) {
+ assert.Len(t, i.Items, 6)
+ assert.Equal(t, "#00ff00", *i.Items[0].Lines[0].Items[0].InlineStyle.SRTColor)
+ assert.Zero(t, i.Items[0].Lines[0].Items[0].InlineStyle.SRTPosition)
+ assert.True(t, i.Items[0].Lines[0].Items[0].InlineStyle.SRTBold)
+ assert.False(t, i.Items[0].Lines[0].Items[0].InlineStyle.SRTItalics)
+ assert.False(t, i.Items[0].Lines[0].Items[0].InlineStyle.SRTUnderline)
+ assert.Equal(t, "#ffffff", *i.Items[1].Lines[0].Items[0].InlineStyle.SRTColor)
+ assert.Zero(t, i.Items[1].Lines[0].Items[0].InlineStyle.SRTPosition)
+ assert.False(t, i.Items[1].Lines[0].Items[0].InlineStyle.SRTBold)
+ assert.False(t, i.Items[1].Lines[0].Items[0].InlineStyle.SRTItalics)
+ assert.False(t, i.Items[1].Lines[0].Items[0].InlineStyle.SRTUnderline)
+ assert.Equal(t, "#00ff00", *i.Items[2].Lines[0].Items[0].InlineStyle.SRTColor)
+ assert.Zero(t, i.Items[2].Lines[0].Items[0].InlineStyle.SRTPosition)
+ assert.False(t, i.Items[2].Lines[0].Items[0].InlineStyle.SRTBold)
+ assert.False(t, i.Items[2].Lines[0].Items[0].InlineStyle.SRTItalics)
+ assert.False(t, i.Items[2].Lines[0].Items[0].InlineStyle.SRTUnderline)
+ assert.Nil(t, i.Items[3].Lines[0].Items[0].InlineStyle.SRTColor)
+ assert.Zero(t, i.Items[3].Lines[0].Items[0].InlineStyle.SRTPosition)
+ assert.True(t, i.Items[3].Lines[0].Items[0].InlineStyle.SRTBold)
+ assert.False(t, i.Items[3].Lines[0].Items[0].InlineStyle.SRTItalics)
+ assert.True(t, i.Items[3].Lines[0].Items[0].InlineStyle.SRTUnderline)
+ assert.Nil(t, i.Items[4].Lines[0].Items[0].InlineStyle.SRTColor)
+ assert.Equal(t, byte(8), i.Items[4].Lines[0].Items[0].InlineStyle.SRTPosition)
+ assert.False(t, i.Items[4].Lines[0].Items[0].InlineStyle.SRTBold)
+ assert.False(t, i.Items[4].Lines[0].Items[0].InlineStyle.SRTItalics)
+ assert.False(t, i.Items[4].Lines[0].Items[0].InlineStyle.SRTUnderline)
+ assert.Nil(t, i.Items[5].Lines[0].Items[0].InlineStyle)
+ assert.Nil(t, i.Items[5].Lines[1].Items[0].InlineStyle.SRTColor)
+ assert.Zero(t, i.Items[5].Lines[1].Items[0].InlineStyle.SRTPosition)
+ assert.False(t, i.Items[5].Lines[1].Items[0].InlineStyle.SRTBold)
+ assert.True(t, i.Items[5].Lines[1].Items[0].InlineStyle.SRTItalics)
+ assert.False(t, i.Items[5].Lines[1].Items[0].InlineStyle.SRTUnderline)
+}
+
func mockSubtitles() *astisub.Subtitles {
return &astisub.Subtitles{Items: []*astisub.Item{{EndAt: 3 * time.Second, StartAt: time.Second, Lines: []astisub.Line{{Items: []astisub.LineItem{{Text: "subtitle-1"}}}}}, {EndAt: 7 * time.Second, StartAt: 3 * time.Second, Lines: []astisub.Line{{Items: []astisub.LineItem{{Text: "subtitle-2"}}}}}}}
}
diff --git a/testdata/example-styled-in.srt b/testdata/example-styled-in.srt
new file mode 100644
index 0000000..b874a40
--- /dev/null
+++ b/testdata/example-styled-in.srt
@@ -0,0 +1,24 @@
+1
+00:00:17,985 --> 00:00:20,521
+[instrumental music]
+
+2
+00:00:47,115 --> 00:00:48,282
+[ticks]
+
+3
+00:00:58,192 --> 00:00:59,727
+[instrumental music]
+
+4
+00:01:01,662 --> 00:01:03,063
+[dog barking]
+
+5
+00:01:26,787 --> 00:01:29,523
+{\an8}[beeping]
+
+6
+00:01:29,590 --> 00:01:31,992
+[automated]
+'The time is 7:35.'
diff --git a/testdata/example-styled-out.srt b/testdata/example-styled-out.srt
new file mode 100644
index 0000000..70ffc70
--- /dev/null
+++ b/testdata/example-styled-out.srt
@@ -0,0 +1,24 @@
+1
+00:00:17,985 --> 00:00:20,521
+[instrumental music]
+
+2
+00:00:47,115 --> 00:00:48,282
+[ticks]
+
+3
+00:00:58,192 --> 00:00:59,727
+[instrumental music]
+
+4
+00:01:01,662 --> 00:01:03,063
+[dog barking]
+
+5
+00:01:26,787 --> 00:01:29,523
+{\an8}[beeping]
+
+6
+00:01:29,590 --> 00:01:31,992
+[automated]
+'The time is 7:35.'
diff --git a/webvtt.go b/webvtt.go
index b153469..ebe7150 100644
--- a/webvtt.go
+++ b/webvtt.go
@@ -2,7 +2,6 @@ package astisub
import (
"bufio"
- "bytes"
"errors"
"fmt"
"io"
@@ -29,8 +28,6 @@ const (
// Vars
var (
- bytesWebVTTItalicEndTag = []byte("")
- bytesWebVTTItalicStartTag = []byte("")
bytesWebVTTTimeBoundariesSeparator = []byte(webvttTimeBoundariesSeparator)
webVTTRegexpStartTag = regexp.MustCompile(`()`)
webVTTEscaper = strings.NewReplacer("&", "&", "<", "<")
@@ -290,7 +287,11 @@ func parseTextWebVTT(i string) (o Line) {
tr := html.NewTokenizer(strings.NewReader(i))
// Loop
- italic := false
+ var (
+ bold bool
+ italic bool
+ underline bool
+ )
for {
// Get next tag
t := tr.Next()
@@ -300,34 +301,49 @@ func parseTextWebVTT(i string) (o Line) {
break
}
+ // Get unmodified text
+ raw := string(tr.Raw())
+ // Get current token
+ token := tr.Token()
+
switch t {
case html.EndTagToken:
- // Parse italic
- if bytes.Equal(tr.Raw(), bytesWebVTTItalicEndTag) {
+ // Parse italic/bold/underline
+ switch token.Data {
+ case "b":
+ bold = false
+ case "i":
italic = false
- continue
+ case "u":
+ underline = false
}
case html.StartTagToken:
// Parse voice name
- if matches := webVTTRegexpStartTag.FindStringSubmatch(string(tr.Raw())); len(matches) > 3 {
+ if matches := webVTTRegexpStartTag.FindStringSubmatch(raw); len(matches) > 3 {
if s := strings.TrimSpace(matches[3]); s != "" {
o.VoiceName = s
}
continue
}
- // Parse italic
- if bytes.Equal(tr.Raw(), bytesWebVTTItalicStartTag) {
+ // Parse italic/bold/underline
+ switch token.Data {
+ case "b":
+ bold = true
+ case "i":
italic = true
- continue
+ case "u":
+ underline = true
}
case html.TextToken:
- if s := strings.TrimSpace(string(tr.Raw())); s != "" {
+ if s := strings.TrimSpace(raw); s != "" {
// Get style attribute
var sa *StyleAttributes
- if italic {
+ if bold || italic || underline {
sa = &StyleAttributes{
- WebVTTItalics: italic,
+ WebVTTBold: bold,
+ WebVTTItalics: italic,
+ WebVTTUnderline: underline,
}
sa.propagateWebVTTAttributes()
}
@@ -516,20 +532,34 @@ func (li LineItem) webVTTBytes() (c []byte) {
color = cssColor(*li.InlineStyle.TTMLColor)
}
- // Get italics
+ // Get bold/italics/underline
+ b := li.InlineStyle != nil && li.InlineStyle.WebVTTBold
i := li.InlineStyle != nil && li.InlineStyle.WebVTTItalics
+ u := li.InlineStyle != nil && li.InlineStyle.WebVTTUnderline
// Append
if color != "" {
c = append(c, []byte("")...)
}
+ if b {
+ c = append(c, []byte("")...)
+ }
if i {
c = append(c, []byte("")...)
}
+ if u {
+ c = append(c, []byte("")...)
+ }
c = append(c, []byte(escapeWebVTT(li.Text))...)
+ if u {
+ c = append(c, []byte("")...)
+ }
if i {
c = append(c, []byte("")...)
}
+ if b {
+ c = append(c, []byte("")...)
+ }
if color != "" {
c = append(c, []byte("")...)
}
@@ -537,12 +567,16 @@ func (li LineItem) webVTTBytes() (c []byte) {
}
func cssColor(rgb string) string {
+ // https://www.w3.org/TR/webvtt1/#default-text-color
colors := map[string]string{
"#00ffff": "cyan", // narrator, thought
"#ffff00": "yellow", // out of vision
"#ff0000": "red", // noises
"#ff00ff": "magenta", // song
"#00ff00": "lime", // foreign speak
+ "#ffffff": "white",
+ "#0000ff": "blue",
+ "#000000": "black",
}
return colors[strings.ToLower(rgb)] // returning the empty string is ok
}