Add speaker support to WebVTT import/export

For example :- 00:20.000 --> 00:24.000 <v Shawn>I work within the Web Accessibility Initiative, W-A-I, pronounced "way". 00:36.000 --> 00:38.000 <v.loud Mary Fisher>That's awesome!
dagronf · Oct 1, 2024 · 7cfb9ce · 7cfb9ce
1 parent 3aada02
commit 7cfb9ce
Show file tree

Hide file tree

Showing 5 changed files with 160 additions and 46 deletions.
diff --git a/Sources/SwiftSubtitles/coding/VTT.swift b/Sources/SwiftSubtitles/coding/VTT.swift
@@ -41,6 +41,8 @@ extension Subtitles.Coder {
 
 /// The time matching regex
 private let VTTTimeRegex__ = try! DSFRegex(#"(?:(\d*):)?(?:(\d*):)(\d*)[.,](\d{3})\s*-->\s*(?:(\d*):)?(?:(\d*):)(\d*)[.,](\d{3})"#)
+/// Regex for matching a speaker tag <v.loud Esme>This is a test
+private let VTTSpeakerRegex__ = try! DSFRegex(#"<v[^ ]* ([^>]*)>"#)
 
 public extension Subtitles.Coder.VTT {
 	/// Encode subtitles as Data
@@ -77,6 +79,11 @@ public extension Subtitles.Coder.VTT {
 				e.hour, e.minute, e.second, e.millisecond
 			)
 
+			// If there's a speaker
+			if let sanitized = entry.speaker?.replacingCharacters(in: "<>", with: ".") {
+				result += "<v \(sanitized)>"
+			}
+
 			result += "\(entry.text)\n\n"
 		}
 
@@ -229,11 +236,28 @@ public extension Subtitles.Coder.VTT {
 				index += 1
 			}
 
+			// Check to see if the text contains a speaker tag. If so extract it
+			// If there are multiple speaker tags they are ignored
+			let matches = VTTSpeakerRegex__.matches(for: text)
+			var speaker: String?
+			if matches.count == 1 {
+				let captures = matches[0].captures
+				if captures.count == 1 {
+					// Grab the speaker
+					let r = captures[0]
+					speaker = String(text[r])
+
+					// Strip the speaker tag out of the text
+					text.removeSubrange(matches[0].range)
+				}
+			}
+
 			let entry = Subtitles.Cue(
 				identifier: identifier,
 				startTime: times!.0,
 				endTime: times!.1,
-				text: text
+				text: text,
+				speaker: speaker
 			)
 			results.append(entry)
 		}

diff --git a/Sources/SwiftSubtitles/private/String+extensions.swift b/Sources/SwiftSubtitles/private/String+extensions.swift
@@ -26,7 +26,7 @@
 
 import Foundation
 
-internal extension String {
+extension String {
 	/// Split the string into its component lines
 	///
 	/// Much more reliable than `content.components(separatedBy: .newlines)`
@@ -39,4 +39,23 @@ internal extension String {
 		}
 		return linesArray
 	}
+
+	/// Replace the specified characters in this string with the given string
+	/// - Parameters:
+	///   - characters: The characters to replace
+	///   - r: The replacement characters
+	/// - Returns: A new String with the characters replaced
+	func replacingCharacters(in chars: String, with replacement: String) -> String {
+		var result = ""
+		result.reserveCapacity(self.count)
+		self.forEach { ch in
+			if chars.contains(ch) {
+				result.append(replacement)
+			}
+			else {
+				result.append(ch)
+			}
+		}
+		return result
+	}
 }
diff --git a/SwiftSubtitles.podspec b/SwiftSubtitles.podspec
@@ -1,6 +1,6 @@
 Pod::Spec.new do |s|
   s.name                 = "SwiftSubtitles"
-  s.version              = "1.6.0"
+  s.version              = "1.7.0"
   s.summary              = "A Swift package for reading/writing some common subtitle formats."
   s.description          = <<-DESC
     A Swift package for reading/writing subtitle formats (srt, sbv, sub, vtt, csv).

diff --git a/Tests/SwiftSubtitlesTests/VTTTests.swift b/Tests/SwiftSubtitlesTests/VTTTests.swift
@@ -136,7 +136,10 @@ WEBVTT
 		XCTAssertEqual(13, subtitles.cues.count)
 		XCTAssertEqual(Subtitles.Time(second: 11, millisecond: 0), subtitles.cues[0].startTime)
 		XCTAssertEqual(Subtitles.Time(second: 13, millisecond: 0), subtitles.cues[0].endTime)
-		XCTAssertEqual("<v Roger Bingham>We are in New York City", subtitles.cues[0].text)
+		XCTAssertEqual("We are in New York City", subtitles.cues[0].text)
+		XCTAssertEqual("Roger Bingham", subtitles.cues[0].speaker)
+		XCTAssertEqual("Didn’t we talk about enough in that conversation?", subtitles.cues[9].text)
+		XCTAssertEqual("Neil deGrasse Tyson", subtitles.cues[9].speaker)
 	}
 
 	func testMoreComplex() throws {
@@ -327,28 +330,28 @@ Never drink liquid nitrogen.
 		XCTAssertEqual(subtitles.cues[6].text, "UPC")
 	}
 
-    func testVTTCueWithEmptyPayload() throws {
-        // An empty payload is valid.
-        let vttSampleWithEmptyPayload = """
+	func testVTTCueWithEmptyPayload() throws {
+		// An empty payload is valid.
+		let vttSampleWithEmptyPayload = """
 WEBVTT
 
 00:01.000 --> 00:04.000
 
 """
-        let coder = Subtitles.Coder.VTT()
-        let subtitles = try coder.decode(vttSampleWithEmptyPayload)
-        XCTAssertEqual(1, subtitles.cues.count, "Expected 1 cue, got \(subtitles.cues.count)")
-    }
-
-    func testVTTWithCueTimingsButNoPayload() throws {
-        // Cue timings with no linefeed indicates no payload.
-        let vttWithoutPayload = "WEBVTT\n00:00:01.000 --> 00:00:04.000"
-        let coder = Subtitles.Coder.VTT()
-        let subtitles = try coder.decode(vttWithoutPayload)
-        XCTAssertEqual(0, subtitles.cues.count, "Expected 0 cues, got \(subtitles.cues.count)")
-        let text = subtitles.cues.first?.text
-        XCTAssertNil(text, "Expected nil, got \(text ?? "")")
-    }
+		let coder = Subtitles.Coder.VTT()
+		let subtitles = try coder.decode(vttSampleWithEmptyPayload)
+		XCTAssertEqual(1, subtitles.cues.count, "Expected 1 cue, got \(subtitles.cues.count)")
+	}
+
+	func testVTTWithCueTimingsButNoPayload() throws {
+		// Cue timings with no linefeed indicates no payload.
+		let vttWithoutPayload = "WEBVTT\n00:00:01.000 --> 00:00:04.000"
+		let coder = Subtitles.Coder.VTT()
+		let subtitles = try coder.decode(vttWithoutPayload)
+		XCTAssertEqual(0, subtitles.cues.count, "Expected 0 cues, got \(subtitles.cues.count)")
+		let text = subtitles.cues.first?.text
+		XCTAssertNil(text, "Expected nil, got \(text ?? "")")
+	}
 
     func testVTTWithEmptyAndMultilineCues() throws {
         // A newline must precede a cue, but an empty payload is valid.
@@ -364,34 +367,77 @@ WEBVTT
 00:00:09.000 --> 00:00:12.000
 
 """
-        let coder = Subtitles.Coder.VTT()
-        let subtitles = try coder.decode(sampleVTTContent)
-        XCTAssertEqual(3, subtitles.cues.count, "Expected 3 cues, got \(subtitles.cues.count)")
-        // Accessing the middle cue with multiline text.
-        let multilineText = subtitles.cues[1].text
-        let lines = multilineText.split(separator: "\n")
-        let numberOfLines = lines.count
-        XCTAssertEqual(2, numberOfLines, "Expected 2 lines, got \(numberOfLines)")
-    }
-
-    func testMissingNewlineTreatsTimeLineAsText() throws {
-        // An empty newline must precede a cue.
-        // A time line without one is considered text.
-        let vttWithoutNewlineBeforeSecondCue = """
+		 let coder = Subtitles.Coder.VTT()
+		 let subtitles = try coder.decode(sampleVTTContent)
+		 XCTAssertEqual(3, subtitles.cues.count, "Expected 3 cues, got \(subtitles.cues.count)")
+		 // Accessing the middle cue with multiline text.
+		 let multilineText = subtitles.cues[1].text
+		 let lines = multilineText.split(separator: "\n")
+		 let numberOfLines = lines.count
+		 XCTAssertEqual(2, numberOfLines, "Expected 2 lines, got \(numberOfLines)")
+	 }
+
+	func testMissingNewlineTreatsTimeLineAsText() throws {
+		// An empty newline must precede a cue.
+		// A time line without one is considered text.
+		let vttWithoutNewlineBeforeSecondCue = """
 WEBVTT
 
 00:00:01.000 --> 00:00:04.000
 00:00:05.000 --> 00:00:08.000
 """
-        let coder = Subtitles.Coder.VTT()
-        let subtitles = try coder.decode(vttWithoutNewlineBeforeSecondCue)
-        XCTAssertEqual(1, subtitles.cues.count, "Expected 1 cue, got \(subtitles.cues.count)")
-        let startTimeSecond = subtitles.cues.first?.startTime.second
-        let endTimeSecond = subtitles.cues.first?.endTime.second
-        let text = subtitles.cues.first?.text
-        XCTAssertEqual(startTimeSecond, 1, "Expected 1 second, got \(startTimeSecond ?? 0)")
-        XCTAssertEqual(endTimeSecond, 4, "Expected 4 seconds, got \(endTimeSecond ?? 0)")
-        let expectedText = "00:00:05.000 --> 00:00:08.000"
-        XCTAssertEqual(text, expectedText, "Expected \(expectedText), got \(text ?? "")")
-    }
+		let coder = Subtitles.Coder.VTT()
+		let subtitles = try coder.decode(vttWithoutNewlineBeforeSecondCue)
+		XCTAssertEqual(1, subtitles.cues.count, "Expected 1 cue, got \(subtitles.cues.count)")
+		let startTimeSecond = subtitles.cues.first?.startTime.second
+		let endTimeSecond = subtitles.cues.first?.endTime.second
+		let text = subtitles.cues.first?.text
+		XCTAssertEqual(startTimeSecond, 1, "Expected 1 second, got \(startTimeSecond ?? 0)")
+		XCTAssertEqual(endTimeSecond, 4, "Expected 4 seconds, got \(endTimeSecond ?? 0)")
+		let expectedText = "00:00:05.000 --> 00:00:08.000"
+		XCTAssertEqual(text, expectedText, "Expected \(expectedText), got \(text ?? "")")
+	}
+
+	func testBasicSpeakers1() throws {
+
+		let fileURL = Bundle.module.url(forResource: "speakers1", withExtension: "vtt")!
+		let subtitles = try Subtitles(fileURL: fileURL, encoding: .utf8)
+
+		XCTAssertEqual(8, subtitles.cues.count)
+
+		XCTAssertEqual("Rajwinder Kaur", subtitles.cues[0].speaker)
+		XCTAssertEqual("Shawn Henry", subtitles.cues[1].speaker)
+		XCTAssertEqual("Thank you >for this opportunity to share information about accessibility.", subtitles.cues[1].text)
+		XCTAssertEqual("Rajwinder", subtitles.cues[2].speaker)
+		XCTAssertEqual("Shawn", subtitles.cues[3].speaker)
+		XCTAssertEqual("Shawn Fisher", subtitles.cues[4].speaker)
+		XCTAssertEqual("Mary Fisher", subtitles.cues[5].speaker)
+		XCTAssertEqual("Jasper Mc.Donald", subtitles.cues[6].speaker)
+		XCTAssertEqual("That's awesome!", subtitles.cues[6].text)
+		XCTAssertEqual("Esme Mc.Donald", subtitles.cues[7].speaker)
+		XCTAssertEqual("<i>laughter</i>It's a blue apple tree", subtitles.cues[7].text)
+	}
+
+	func testBasicExportWithSpeakers() throws {
+		let subtitles = Subtitles([
+			Subtitles.Cue(startTime: 10, duration: 0.25, text: "hi there", speaker: "Henry McDonald"),
+			Subtitles.Cue(startTime: 10.25, duration: 0.25, text: "and yo to you!", speaker: "Emse<Fisher"),
+			Subtitles.Cue(startTime: 11.00, duration: 0.25, text: "Jingle jangle <i>laughter</i> goes the bell", speaker: "Jonathan<Gupppy>")
+		])
+		let content = try Subtitles.Coder.VTT().encode(subtitles: subtitles)
+		XCTAssert(content.count > 0)
+		XCTAssertTrue(content.contains("<v Henry McDonald>"))
+		XCTAssertTrue(content.contains("<v Emse.Fisher>"))
+		XCTAssertTrue(content.contains("<v Jonathan.Gupppy.>Jingle jangle <i>laughter</i> goes the bell"))
+	}
+
+	func testBasicExportWithoutSpeakers() throws {
+		let subtitles = Subtitles([
+			Subtitles.Cue(startTime: 10, duration: 0.25, text: "hi there"),
+			Subtitles.Cue(startTime: 10.25, duration: 0.25, text: "and yo to you!")
+		])
+		let content = try Subtitles.Coder.VTT().encode(subtitles: subtitles)
+		XCTAssert(content.count > 0)
+		XCTAssertFalse(content.contains("<v"))  // Should be no <v tags
+	}
 }
diff --git a/Tests/SwiftSubtitlesTests/resources/vtt/speakers1.vtt b/Tests/SwiftSubtitlesTests/resources/vtt/speakers1.vtt
@@ -0,0 +1,25 @@
+WEBVTT
+
+00:11.000 --> 00:13.000
+<v Rajwinder Kaur>Welcome to the podcast.
+
+00:13.000 --> 00:17.000
+<v Shawn Henry>Thank you >for this opportunity to share information about accessibility.
+
+00:17.000 --> 00:20.000
+<v Rajwinder>Would you start by telling us a little about your role at W3C?
+
+00:20.000 --> 00:24.000
+<v Shawn>I work within the Web Accessibility Initiative, W-A-I, pronounced "way".
+
+00:28.000 --> 00:30.000
+<b noodle><v Shawn Fisher>I work within the Web Accessibility Initiative, W-A-I, pronounced "way".
+
+00:36.000 --> 00:38.000
+<v.loud Mary Fisher>That's awesome!
+
+00:46.000 --> 00:48.000
+<v.loud Jasper Mc.Donald>That's awesome!
+
+00:50.000 --> 00:52.001
+<i>laughter</i><v.first.loud Esme Mc.Donald>It's a blue apple tree