Skip to content

Commit

Permalink
Preserve br tag in ttml
Browse files Browse the repository at this point in the history
  • Loading branch information
NhanNguyen700 committed May 31, 2024
1 parent 2d03157 commit e403fea
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 2 deletions.
34 changes: 34 additions & 0 deletions testdata/example-with-breaklines-in.ttml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<tt xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://www.w3.org/2006/10/ttaf1" xmlns:tt="http://www.w3.org/2006/10/ttaf1" xmlns:tts="http://www.w3.org/2006/10/ttaf1#styling" xmlns:ttp="http://www.w3.org/2006/10/ttaf1#parameter" xmlns:ttm="http://www.w3.org/2006/10/ttaf1#metadata" xml:lang="fr-FR" ttp:timeBase="smpte" ttp:frameRate="25" ttp:frameRateMultiplier="1:1" ttp:markerMode="discontinuous">
<head>
<metadata>
<ttm:title>Title test</ttm:title>
<ttm:copyright>Copyright test</ttm:copyright>
</metadata>
<styling>
<style xml:id="style_0" tts:fontFamily="sansSerif" tts:color="white" tts:fontStyle="normal" tts:textAlign="center" tts:origin="0% 90%" tts:extent="100% 10%"/>
</styling>
<layout>
<region xml:id="region_0" tt:style="style_0" tt:color="blue"/>
</layout>
</head>
<body>
<div>
<p xml:id="sub_1" begin="00:00:00.000" end="00:00:01.000" region="region_0" style="style_0" color="red">
<span style="style_0" color="black">First line<br/>Second line</span>
</p>
<p xml:id="sub_2" begin="00:00:01.000" end="00:00:02.000" region="region_0" style="style_0" color="red">
<span style="style_0" color="black">Third line<br></br>Fourth line</span>
</p>
<p xml:id="sub_3" begin="00:00:02.000" end="00:00:03.000" region="region_0">
Fifth line
<br/>
Sixth <span style="style_0" color="green">middle</span> line
</p>
<p xml:id="sub_4" begin="00:00:03.000" end="00:00:04.000" region="region_0">
Seventh line
<br></br>
Eighth <span style="style_0" color="green">middle</span> line
</p>
</div>
</body>
</tt>
42 changes: 42 additions & 0 deletions testdata/example-with-breaklines-out.ttml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
<tt xmlns="http://www.w3.org/ns/ttml" xml:lang="fr" xmlns:ttm="http://www.w3.org/ns/ttml#metadata" xmlns:tts="http://www.w3.org/ns/ttml#styling">
<head>
<metadata>
<ttm:copyright>Copyright test</ttm:copyright>
<ttm:title>Title test</ttm:title>
</metadata>
<styling>
<style xml:id="style_0" tts:color="white" tts:extent="100% 10%" tts:fontFamily="sansSerif" tts:fontStyle="normal" tts:origin="0% 90%" tts:textAlign="center"></style>
</styling>
<layout>
<region xml:id="region_0" style="style_0" tts:color="blue"></region>
</layout>
</head>
<body>
<div>
<p begin="00:00:00.000" end="00:00:01.000" region="region_0" style="style_0" tts:color="red">
<span style="style_0" tts:color="black">First line</span>
<br></br>
<span style="style_0" tts:color="black">Second line</span>
</p>
<p begin="00:00:01.000" end="00:00:02.000" region="region_0" style="style_0" tts:color="red">
<span style="style_0" tts:color="black">Third line</span>
<br></br>
<span style="style_0" tts:color="black">Fourth line</span>
</p>
<p begin="00:00:02.000" end="00:00:03.000" region="region_0">
<span>Fifth line</span>
<br></br>
<span>Sixth </span>
<span style="style_0" tts:color="green">middle </span>
<span>line</span>
</p>
<p begin="00:00:03.000" end="00:00:04.000" region="region_0">
<span>Seventh line</span>
<br></br>
<span>Eighth </span>
<span style="style_0" tts:color="green">middle </span>
<span>line</span>
</p>
</div>
</body>
</tt>
36 changes: 35 additions & 1 deletion ttml.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,39 @@ func (i *TTMLInItems) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err
return nil
}

// handleBrTokenReader is used only for decoding TTMLInItems, do not use it anywhere else
type handleBrTokenReader struct {
xml.Decoder
holdingToken xml.Token
}

// Token implements the TokenReader interface, when it meets the "br" tag, it will hold the token and return a newline
// instead. This is to work around the fact that the go xml unmarshaler will ignore the "br" tag if it's within a
// character data field.
func (r *handleBrTokenReader) Token() (xml.Token, error) {
if r.holdingToken != nil {
returnToken := r.holdingToken
r.holdingToken = nil
return returnToken, nil
}

t, err := r.Decoder.Token()
if err != nil {
return nil, err
}

if se, ok := t.(xml.StartElement); ok && strings.ToLower(se.Name.Local) == "br" {
r.holdingToken = t
return xml.CharData("\n"), nil
}

return t, nil
}

func newHandleBrTokenReader(r io.Reader) xml.TokenReader {
return &handleBrTokenReader{Decoder: *xml.NewDecoder(r), holdingToken: nil}
}

// TTMLInItem represents an input TTML item
type TTMLInItem struct {
Style string `xml:"style,attr,omitempty"`
Expand Down Expand Up @@ -380,7 +413,8 @@ func ReadFromTTML(i io.Reader) (o *Subtitles, err error) {

// Unmarshal items
var items = TTMLInItems{}
if err = xml.Unmarshal([]byte("<span>"+ts.Items+"</span>"), &items); err != nil {
decoder := xml.NewTokenDecoder(newHandleBrTokenReader(strings.NewReader("<p>" + ts.Items + "</p>")))
if err = decoder.Decode(&items); err != nil {
err = fmt.Errorf("astisub: unmarshaling items failed: %w", err)
return
}
Expand Down
20 changes: 19 additions & 1 deletion ttml_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ package astisub_test

import (
"bytes"
"github.com/asticode/go-astikit"
"io/ioutil"
"strings"
"testing"

"github.com/asticode/go-astikit"

"github.com/asticode/go-astisub"
"github.com/stretchr/testify/assert"
)
Expand Down Expand Up @@ -50,3 +52,19 @@ func TestTTML(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, string(c), w.String())
}

func TestBreakLineHandling(t *testing.T) {
// Open
s, err := astisub.OpenFile("./testdata/example-with-breaklines-in.ttml")
assert.NoError(t, err)

// Write
w := &bytes.Buffer{}
err = s.WriteToTTML(w)
assert.NoError(t, err)

c, err := ioutil.ReadFile("./testdata/example-with-breaklines-out.ttml")
assert.NoError(t, err)

assert.Equal(t, strings.TrimSpace(string(c)), strings.TrimSpace(w.String()))
}

0 comments on commit e403fea

Please sign in to comment.