-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathindex.py
88 lines (71 loc) · 3.72 KB
/
index.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from youtube_transcript_api import YouTubeTranscriptApi
from datetime import timedelta
import subprocess
import sys
from requests_html import HTMLSession
import pyperclip
session = HTMLSession()
data = sys.argv[1] if len(sys.argv) > 1 else None
def generate_end_time(start_time, duration):
return start_time + duration
def generate_timestamp_yt_url(video_id, timestamp):
return f'https://www.youtube.com/watch?v={video_id}&t={timestamp}s'
def convert_transcript_to_obsidian_format(video_id, transcript):
obsidian_formatted_str = ''
for i in range(0, len(transcript)):
start_time = transcript[i]['start']
duration = transcript[i]['duration']
end_time = generate_end_time(transcript[i]['start'], duration)
start_time_url = generate_timestamp_yt_url(video_id, start_time)
obsidian_formatted_str += f'[{"{:0>8}".format(str(timedelta(seconds=round(start_time))))}]({start_time_url}) -> {"{:0>8}".format(str(timedelta(seconds=round(end_time))))} : {transcript[i]["text"]}\n'
return obsidian_formatted_str
def extract_id_channel_and_title_from_yt5_url(url):
res = session.get(url)
if len(url.split('&')) > 1: # regular url
channel = url.split('&')[1].split('=')[1]
v_id = url.split('=')[1].split('&')[0]
else:
if len(url.split('=')) > 1: # yt url no ab channel
v_id = url.split('=')[1]
channel_html = res.html.find('body')
if "channelId" in channel_html[0].text:
channel = channel_html[0].text.split('channelId')[1].strip().split('author')[1].strip().split(':')[1].strip().split(',')[0].strip().replace('"', '')
else: # yt share shortlink
v_id = url.split('be/')[1]
channel_html = res.html.find('body')
if "channelId" in channel_html[0].text:
channel = channel_html[0].text.split('channelId')[1].strip().split('author')[1].strip().split(':')[1].strip().split(',')[0].strip().replace('"', '')
title = res.html.xpath('.//title')[0].text
channel = channel.replace(' ', '_')
return v_id, title, channel
def generate_metadata(url, channel, title, video_id):
'''
## Metadata
- Author: (yt_channel)
- Full Title: (yt_title)
- Category: #youtube_transcript
- URL: (yt url -- iframeable)
<iframe width="560" height="315" src="https://www.youtube.com/embed/{video_id}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
'''
channel_tag = '#' + channel
return f'# {title} \n## Metadata\n- Author: {channel_tag}\n- Title: [{title}]({url})\n- Category: #youtube_transcript\n<iframe width="560" height="315" src="https://www.youtube.com/embed/{video_id}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'
if data:
yt_url = data
else:
yt_url = input('Enter the youtube url: ')
if len(yt_url) == 0 or len(yt_url) > 150:
raise Exception('Please enter a valid youtube url')
print('Extracting video information...')
v_id, title, channel = extract_id_channel_and_title_from_yt5_url(yt_url)
print(f'Video ID: {v_id}')
print(f'Title: {title}')
print(f'Channel: {channel}')
print('Generating metadata for Obsidian...')
metadata = generate_metadata(yt_url, channel, title, v_id)
print('Generating transcript...')
transcript = YouTubeTranscriptApi.get_transcript(v_id)
print('Converting transcript to obsidian format...')
transcript = convert_transcript_to_obsidian_format(v_id, transcript)
print('Copying to clipboard...')
pyperclip.copy(metadata + '\n\n\n' + transcript)
print('Done copying to clipboard!')