diff --git a/testdata/example-in-breaklines.ttml b/testdata/example-in-breaklines.ttml new file mode 100644 index 0000000..47de468 --- /dev/null +++ b/testdata/example-in-breaklines.ttml @@ -0,0 +1,24 @@ + + + + +
+

+ First line
+Second line
+

+

+ Third line

Fourth line
+

+

+ Fifth line +
+ Sixth middle line +

+

+ Seventh line +

Eighth middle line +

+
+ +
\ No newline at end of file diff --git a/testdata/example-out-breaklines.ttml b/testdata/example-out-breaklines.ttml new file mode 100644 index 0000000..2a7526d --- /dev/null +++ b/testdata/example-out-breaklines.ttml @@ -0,0 +1,32 @@ + + + + + + +
+

+ First line +

+ Second line +

+

+ Third lineFourth line +

+

+ Fifth line +

+ Sixth + middle + line +

+

+ Seventh line +

+ Eighth + middle + line +

+
+ +
\ No newline at end of file diff --git a/ttml.go b/ttml.go index 2e8046b..f86d329 100644 --- a/ttml.go +++ b/ttml.go @@ -197,6 +197,38 @@ func (i *TTMLInItems) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err return nil } +type ttmlXmlDecoder struct { + xml.Decoder + holdingToken xml.Token +} + +// Token implements the TokenReader interface, when it meets the "br" tag, it will hold the token and return a newline +// instead. This is to work around the fact that the go xml unmarshaler will ignore the "br" tag if it's within a +// character data field. +func (r *ttmlXmlDecoder) Token() (xml.Token, error) { + if r.holdingToken != nil { + returnToken := r.holdingToken + r.holdingToken = nil + return returnToken, nil + } + + t, err := r.Decoder.Token() + if err != nil { + return nil, err + } + + if se, ok := t.(xml.StartElement); ok && strings.ToLower(se.Name.Local) == "br" { + r.holdingToken = t + return xml.CharData("\n"), nil + } + + return t, nil +} + +func newTTMLXmlDecoder(ts TTMLInSubtitle) *ttmlXmlDecoder { + return &ttmlXmlDecoder{Decoder: *xml.NewDecoder(strings.NewReader("

" + ts.Items + "

")), holdingToken: nil} +} + // TTMLInItem represents an input TTML item type TTMLInItem struct { Style string `xml:"style,attr,omitempty"` @@ -380,7 +412,7 @@ func ReadFromTTML(i io.Reader) (o *Subtitles, err error) { // Unmarshal items var items = TTMLInItems{} - if err = xml.Unmarshal([]byte(""+ts.Items+""), &items); err != nil { + if err = newTTMLXmlDecoder(ts).Decode(&items); err != nil { err = fmt.Errorf("astisub: unmarshaling items failed: %w", err) return } diff --git a/ttml_test.go b/ttml_test.go index 990c821..bba9e36 100644 --- a/ttml_test.go +++ b/ttml_test.go @@ -2,10 +2,12 @@ package astisub_test import ( "bytes" - "github.com/asticode/go-astikit" "io/ioutil" + "strings" "testing" + "github.com/asticode/go-astikit" + "github.com/asticode/go-astisub" "github.com/stretchr/testify/assert" ) @@ -50,3 +52,19 @@ func TestTTML(t *testing.T) { assert.NoError(t, err) assert.Equal(t, string(c), w.String()) } + +func TestTTMLBreakLines(t *testing.T) { + // Open + s, err := astisub.OpenFile("./testdata/example-in-breaklines.ttml") + assert.NoError(t, err) + + // Write + w := &bytes.Buffer{} + err = s.WriteToTTML(w) + assert.NoError(t, err) + + c, err := ioutil.ReadFile("./testdata/example-out-breaklines.ttml") + assert.NoError(t, err) + + assert.Equal(t, strings.TrimSpace(string(c)), strings.TrimSpace(w.String())) +}