Skip to content

Commit

Permalink
Setup parsing srt stylings for webvtt
Browse files Browse the repository at this point in the history
  • Loading branch information
justin-taylor committed Nov 14, 2024
1 parent 80e6dcf commit 0fd02a0
Show file tree
Hide file tree
Showing 3 changed files with 234 additions and 7 deletions.
157 changes: 154 additions & 3 deletions srt.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"strings"
"time"
"unicode/utf8"

"golang.org/x/net/html"
)

// Constants
Expand Down Expand Up @@ -116,7 +118,95 @@ func ReadFromSRT(i io.Reader) (o *Subtitles, err error) {
o.Items = append(o.Items, s)
} else {
// Add text
s.Lines = append(s.Lines, Line{Items: []LineItem{{Text: strings.TrimSpace(line)}}})
if l := parseTextSrt(strings.TrimSpace(line)); len(l.Items) > 0 {
s.Lines = append(s.Lines, l)
}
}
}
return
}

// parseTextSrt parses the input line to fill the Line
func parseTextSrt(i string) (o Line) {
// special handling needed for empty line
if strings.TrimSpace(i) == "" {
o.Items = []LineItem{{Text: ""}}
return
}

// Create tokenizer
tr := html.NewTokenizer(strings.NewReader(i))

// Loop
var (
bold bool
italic bool
underline bool
color *string
pos byte
)
for {
// Get next tag
t := tr.Next()

// Process error
if err := tr.Err(); err != nil {
break
}

// Get unmodified text
raw := string(tr.Raw())
// Get current token
token := tr.Token()

switch t {
case html.EndTagToken:
// Parse italic/bold/underline
switch token.Data {
case "b":
bold = false
case "i":
italic = false
case "u":
underline = false
case "font":
color = nil
}
case html.StartTagToken:
// Parse italic/bold/underline
switch token.Data {
case "b":
bold = true
case "i":
italic = true
case "u":
underline = true
case "font":
if c := htmlTokenAttribute(&token, "color"); c != nil {
color = c
}
}
case html.TextToken:
if s := strings.TrimSpace(raw); s != "" {
// Get style attribute
var sa *StyleAttributes
if bold || italic || underline || color != nil || pos != 0 {
sa = &StyleAttributes{
SRTBold: bold,
SRTColor: color,
SRTItalics: italic,
SRTPosition: pos,
SRTUnderline: underline,
}
sa.propagateSRTAttributes()
}

// Append item
o.Items = append(o.Items, LineItem{
InlineStyle: sa,
Text: s,
})
}
}
}
return
Expand Down Expand Up @@ -151,8 +241,7 @@ func (s Subtitles) WriteToSRT(o io.Writer) (err error) {

// Loop through lines
for _, l := range v.Lines {
c = append(c, []byte(l.String())...)
c = append(c, bytesLineSeparator...)
c = append(c, []byte(l.srtBytes())...)
}

// Add new line
Expand All @@ -169,3 +258,65 @@ func (s Subtitles) WriteToSRT(o io.Writer) (err error) {
}
return
}

func (l Line) srtBytes() (c []byte) {
for idx, li := range l.Items {
c = append(c, li.srtBytes()...)
// condition to avoid adding space as the last character.
if idx < len(l.Items)-1 {
c = append(c, []byte(" ")...)
}
}
c = append(c, bytesLineSeparator...)
return
}

func (li LineItem) srtBytes() (c []byte) {
// Get color
var color string
if li.InlineStyle != nil && li.InlineStyle.SRTColor != nil {
color = *li.InlineStyle.SRTColor
}

// Get bold/italics/underline
b := li.InlineStyle != nil && li.InlineStyle.SRTBold
i := li.InlineStyle != nil && li.InlineStyle.SRTItalics
u := li.InlineStyle != nil && li.InlineStyle.SRTUnderline

// Get position
var pos byte
if li.InlineStyle != nil {
pos = li.InlineStyle.SRTPosition
}

// Append
if color != "" {
c = append(c, []byte("<font color=\""+color+"\">")...)
}
if b {
c = append(c, []byte("<b>")...)
}
if i {
c = append(c, []byte("<i>")...)
}
if u {
c = append(c, []byte("<u>")...)
}
if pos != 0 {
c = append(c, []byte(fmt.Sprintf(`{\an%d}`, pos))...)
}
c = append(c, []byte(escapeWebVTT(li.Text))...)
if u {
c = append(c, []byte("</u>")...)
}
if i {
c = append(c, []byte("</i>")...)
}
if b {
c = append(c, []byte("</b>")...)
}
if color != "" {
c = append(c, []byte("</font>")...)
}
return
}
80 changes: 79 additions & 1 deletion subtitles.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"time"

"github.com/asticode/go-astikit"
"golang.org/x/net/html"
)

// Bytes
Expand Down Expand Up @@ -173,6 +174,11 @@ var (

// StyleAttributes represents style attributes
type StyleAttributes struct {
SRTBold bool
SRTColor *string
SRTItalics bool
SRTPosition byte // 1-9 numpad layout
SRTUnderline bool
SSAAlignment *int
SSAAlphaLevel *float64
SSAAngle *float64 // degrees
Expand Down Expand Up @@ -236,6 +242,8 @@ type StyleAttributes struct {
TTMLWritingMode *string
TTMLZIndex *int
WebVTTAlign string
WebVTTBold bool
WebVTTItalics bool
WebVTTLine string
WebVTTLines int
WebVTTPosition string
Expand All @@ -244,6 +252,7 @@ type StyleAttributes struct {
WebVTTSize string
WebVTTStyles []string
WebVTTTags []WebVTTTag
WebVTTUnderline bool
WebVTTVertical string
WebVTTViewportAnchor string
WebVTTWidth string
Expand Down Expand Up @@ -279,6 +288,56 @@ func (t WebVTTTag) endTag() string {
return "</" + t.Name + ">"
}

func (sa *StyleAttributes) propagateSRTAttributes() {
// copy relevant attrs to WebVTT ones
if sa.SRTColor != nil {
// TODO: handle non-default colors that need custom styles
sa.TTMLColor = sa.SRTColor
}

switch sa.SRTPosition {
case 7: // top-left
sa.WebVTTAlign = "left"
sa.WebVTTPosition = "10%"
case 8: // top-center
sa.WebVTTPosition = "10%"
case 9: // top-right
sa.WebVTTAlign = "right"
sa.WebVTTPosition = "10%"
case 4: // middle-left
sa.WebVTTAlign = "left"
sa.WebVTTPosition = "50%"
case 5: // middle-center
sa.WebVTTPosition = "50%"
case 6: // middle-right
sa.WebVTTAlign = "right"
sa.WebVTTPosition = "50%"
case 1: // bottom-left
sa.WebVTTAlign = "left"
sa.WebVTTPosition = "90%"
case 2: // bottom-center
sa.WebVTTPosition = "90%"
case 3: // bottom-right
sa.WebVTTAlign = "right"
sa.WebVTTPosition = "90%"
}

sa.WebVTTBold = sa.SRTBold
sa.WebVTTItalics = sa.SRTItalics
sa.WebVTTUnderline = sa.SRTUnderline

sa.WebVTTTags = make([]WebVTTTag, 0)
if sa.WebVTTBold {
sa.WebVTTTags = append(sa.WebVTTTags, WebVTTTag{Name: "b"})
}
if sa.WebVTTItalics {
sa.WebVTTTags = append(sa.WebVTTTags, WebVTTTag{Name: "i"})
}
if sa.WebVTTUnderline {
sa.WebVTTTags = append(sa.WebVTTTags, WebVTTTag{Name: "u"})
}
}

func (sa *StyleAttributes) propagateSSAAttributes() {}

func (sa *StyleAttributes) propagateSTLAttributes() {
Expand Down Expand Up @@ -352,7 +411,15 @@ func (sa *StyleAttributes) propagateTTMLAttributes() {
}
}

func (sa *StyleAttributes) propagateWebVTTAttributes() {}
func (sa *StyleAttributes) propagateWebVTTAttributes() {
// copy relevant attrs to SRT ones
if sa.TTMLColor != nil {
sa.SRTColor = sa.TTMLColor
}
sa.SRTBold = sa.WebVTTBold
sa.SRTItalics = sa.WebVTTItalics
sa.SRTUnderline = sa.WebVTTUnderline
}

// Metadata represents metadata
// TODO Merge attributes
Expand Down Expand Up @@ -835,3 +902,14 @@ func appendStringToBytesWithNewLine(i []byte, s string) (o []byte) {
o = append(o, bytesLineSeparator...)
return
}

func htmlTokenAttribute(t *html.Token, key string) *string {

for _, attr := range t.Attr {
if attr.Key == key {
return &attr.Val
}
}

return nil
}
4 changes: 1 addition & 3 deletions webvtt.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,11 @@ const (

// Vars
var (
bytesWebVTTItalicEndTag = []byte("</i>")
bytesWebVTTItalicStartTag = []byte("<i>")
bytesWebVTTTimeBoundariesSeparator = []byte(webvttTimeBoundariesSeparator)
webVTTRegexpInlineTimestamp = regexp.MustCompile(`<((?:\d{2,}:)?\d{2}:\d{2}\.\d{3})>`)
webVTTRegexpTag = regexp.MustCompile(`(</*\s*([^\.\s]+)(\.[^\s/]*)*\s*([^/]*)\s*/*>)`)
webVTTEscaper = strings.NewReplacer("&", "&amp;", "<", "&lt;")
webVTTUnescaper = strings.NewReplacer("&amp;", "&", "&lt;", "<")
webVTTUnescaper = strings.NewReplacer("&nbsp;", " ", "&amp;", "&", "&lt;", "<")
)

// parseDurationWebVTT parses a .vtt duration
Expand Down

0 comments on commit 0fd02a0

Please sign in to comment.