Skip to content

Commit

Permalink
improved chapter extract regex
Browse files Browse the repository at this point in the history
now also Support this
⭐️Time Stamps⭐️
#1   (00:00:00) CSS tutorial for beginners 🎨
#2   (00:11:00) fonts 🆒
#3   (00:14:20) borders 🔲
#4   (00:16:56) background 🌆
#5   (00:20:52) margins 📏
#6   (00:25:44) float 🎈
#7   (00:29:01) position 🎯
#8   (00:34:58) pseudo classes 👨‍👧‍👦
#9   (00:40:47) shadows 👥
#10 (00:43:43) icons 🏠
#11 (00:46:45) transform 🔄
#12 (00:50:54) animation 🎞️
  • Loading branch information
P4rthPat3l committed Nov 14, 2024
1 parent cfe414f commit 3ba02df
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 5 deletions.
2 changes: 1 addition & 1 deletion cmd/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ func NewVersion() *cobra.Command {
Use: "version",
Short: "Check the version info",
RunE: func(cmd *cobra.Command, args []string) error {
cmd.Printf("- YT Drop %s\n", "1.0.6")
cmd.Printf("- YT Drop %s\n", "1.0.9")
cmd.Printf("- os/type: %s\n", runtime.GOOS)
cmd.Printf("- os/arch: %s\n", runtime.GOARCH)
cmd.Printf("- go/version: %s\n", runtime.Version())
Expand Down
13 changes: 9 additions & 4 deletions pkg/FFMpeg/ffmpeg.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ package FFMpeg
import (
"fmt"
"regexp"
"sort"
"strconv"
"strings"
)

// Chapter represents a video chapter
type Chapter struct {
StartTime int64
EndTime int64
Expand Down Expand Up @@ -39,7 +39,8 @@ func parseTime(timeStr string) (int64, error) {

// ExtractChapters parses the input text and returns a slice of Chapter structs
func ExtractChapters(text string, videoDuration int64) ([]Chapter, error) {
pattern := regexp.MustCompile(`(?m)^\s*[^\w\n]*\(?(\d{1,2}:\d{2}(?::\d{2})?)\)?\s*[^\w\n]*\s*(.+)$`)
//pattern := regexp.MustCompile(`(?m)^\s*[^\w\n]*\(?(\d{1,2}:\d{2}(?::\d{2})?)\)?\s*[^\w\n]*\s*(.+)$`)
pattern := regexp.MustCompile(`(?m)^\s*#?\d*\s*[^\w\n]*\(?(\d{1,2}:\d{2}(?::\d{2})?)\)?\s*[^\w\n]*\s*(.+)$`)
matches := pattern.FindAllStringSubmatch(text, -1)

var chapters []Chapter
Expand All @@ -58,13 +59,17 @@ func ExtractChapters(text string, videoDuration int64) ([]Chapter, error) {
})
}

// Validate chapter order and assign end times
sort.Slice(chapters, func(i, j int) bool {
return chapters[i].StartTime < chapters[j].StartTime
})

// Validate chapter order and assign end times, exclude the last element
for i := 0; i < len(chapters)-1; i++ {
current := &chapters[i]
next := &chapters[i+1]

if next.StartTime < current.StartTime {
return nil, fmt.Errorf("invalid chapter timing: chapter %d starts at %dms but previous chapter ends at %dms", i+1, next.StartTime, current.StartTime)
return nil, fmt.Errorf("invalid chapter timing: chapter %d : %s starts at %dms but previous chapter ends at %dms", i+1, next.Title, next.StartTime, current.StartTime)
} else if next.StartTime == current.StartTime {
current.EndTime = next.StartTime
} else {
Expand Down
127 changes: 127 additions & 0 deletions pkg/FFMpeg/ffmpeg_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,133 @@ func TestExtractChapters2(t *testing.T) {
(2:09:17) Using useFetcher hook for form submission
(2:11:08) Throwing errors in Remix
(2:15:41) Closing thought and where to find hel`

videoDuration := int64(301043230) // Example video duration in milliseconds

chapters, err := ExtractChapters(text, videoDuration)
if err != nil {
t.Errorf("error: %s", err)
}

// Print chapters to verify output
for _, chapter := range chapters {
fmt.Printf("Start: %dms, End: %dms, Title: %s\n", chapter.StartTime, chapter.EndTime, chapter.Title)
}
}

func TestExtractChapters3(t *testing.T) {
test2 := `⭐️ Contents ⭐️
#1 (00:00:00) CSS tutorial for beginners 🎨
#2 (00:11:00) fonts 🆒
#3 (00:14:20) borders 🔲
#4 (00:16:56) background 🌆
#5 (00:20:52) margins 📏
#6 (00:25:44) float 🎈
#7 (00:29:01) position 🎯
#8 (00:34:58) pseudo classes 👨‍👧‍👦
#9 (00:40:47) shadows 👥
#10 (00:43:43) icons 🏠
#11 (00:46:45) transform 🔄
#12 (00:50:54) animation 🎞️`
videoDuration := int64(301043230) // Example video duration in milliseconds

chapters, err := ExtractChapters(test2, videoDuration)
if err != nil {
t.Errorf("error: %s", err)
}

// Print chapters to verify output
for _, chapter := range chapters {
fmt.Printf("Start: %dms, End: %dms, Title: %s\n", chapter.StartTime, chapter.EndTime, chapter.Title)
}
}

func TestExtractChaptersInError(t *testing.T) {
text := `✏️ Course developed by Andrew Brown of ExamPro. ‪@ExamProChannel‬
⭐️ Contents ⭐️
0:00:00 Introduction
0:34:47 Setup
0:52:38 Amazon S3
10:52:02 AWS API
12:19:52 VPC
0:34:47 Setup22222222222222
17:33:42 IAM
19:14:03 EC2
21:13:27 AMIs
21:37:10 ASG
21:50:14 ELB
21:57:20 Route53
22:19:29 AWS Global Accelerator
22:21:00 CloudFront
22:30:24 EBS
22:45:34 EFS
22:50:38 FSx
22:54:24 AWS Backup
22:56:29 AWS Snow Family
23:07:07 AWS Transfer Family
23:09:31 AWS Migration Hub
23:15:35 AWS Data Sync
23:24:17 DMS
23:59:42 AWS Auto Scaling
24:16:59 AWS Amplify
24:37:15 Amazon AppFlow
24:53:39 AppSync
25:18:48 AWS Batch
25:46:37 OpenSearch Service
26:09:43 Device Farm
26:22:11 QLDB
26:24:01 Elastic Transcoder
26:52:21 AWS MediaConvert
27:02:09 SNS
27:43:05 SQS
28:44:00 Amazon MQ
29:32:34 Service Catalog
29:40:04 CloudWatch and EventBridge
30:16:36 Lambda
31:49:51 AWS Step Functions
32:48:57 AWS Compute Optimizer
32:59:19 Elastic Beanstalk
34:32:38 Kinesis
34:59:52 ElastiCache
35:51:13 MemoryDB
36:21:52 CloudTrail
37:19:23 Redshift
37:37:50 Athena
37:53:46 ML Managed Services
40:43:04 AWS Data Exchange
40:47:11 AWS Glue
41:27:04 Lake Formation
41:29:41 API Gateway
41:44:09 RDS
42:56:19 Aurora
19:33:29 DocumentDB
44:29:11 DynamoDB
21:10:04 Amazon Keyspaces
45:17:30 Neptune
45:35:00 ECR
45:39:18 ECS
46:02:27 EKS Cloud
46:21:45 KMS
46:32:00 AWS Audit Manager
46:40:23 ACM
46:58:57 Cognito
47:08:33 Amazon Detective
47:16:42 AWS Directory Service
47:22:47 AWS Firewall Manager
47:29:18 AWS Inspector
47:39:57 Amazon Macie
47:49:00 AWS Security Hub
47:53:37 AWS Secrets Manager
48:35:40 AI Dev Tools
48:59:17 Amazon MSK
49:29:32 AWS Shield
49:33:29 AWS WAF
49:37:48 CloudHSM
49:41:59 AWS Guard Duty
49:46:10 Health Dashboards
49:47:42 AWS Artifact
49:50:33 Storage Gateway
50:10:55 EC2 Pricing Models`
videoDuration := int64(301043230) // Example video duration in milliseconds

chapters, err := ExtractChapters(text, videoDuration)
Expand Down

0 comments on commit 3ba02df

Please sign in to comment.