Inspired by the request by anvanvan, I added a little script to my workflow that adds the timestamp to things that I highlight in a Youtube video’s transcript in Readwise Reader.
In case this is useful for some of you, here you go
from youtube_transcript_api import YouTubeTranscriptApi as yta
import re
def format_yt_timestamp(video_id, time):
formatted = []
for t in time:
hours = int(t // 3600)
min = int((t // 60) % 60)
sec = int(t % 60)
ts_url = f"{hours:02d}h{min:02d}m{sec:02d}s"
ts = f"{hours:02d}:{min:02d}:{sec:02d}"
url = f"https://www.youtube.com/watch?v={video_id}&t={ts_url}"
formatted.append(f"[{ts}]({url})")
return formatted
def normalize(str):
str.replace("\n", ' ')
return ' '.join(str.split())
def get_timestamps(video_id, search_word):
transcript_list = yta.list_transcripts(video_id)
transcript = [t.fetch() for t in transcript_list][0]
data = [t['text'] for t in transcript]
data.reverse()
timestamp = [t['start'] for t in transcript]
timestamp.reverse()
text = ''
time = []
search_word = normalize(search_word)
for i, line in enumerate(data):
line = normalize(line)
text = line + ' ' + text
if search_word in text:
time.append(timestamp[i])
text = ''
return format_yt_timestamp(video_id, time)
def handler(pd: "pipedream"):
# Prepare inputs
book = pd.steps["book"]["$return_value"]
highlight = pd.steps["trigger"]["event"]
# Get domain base
url = book["source_url"]
text = highlight["text"]
video_id = ''
match = re.search(r"youtube\.com\/watch.+v=([a-zA-Z0-9]{0,})", url)
if match:
video_id = match.group(1)
match = re.search(r"youtu\.be\/([a-zA-Z0-9]{0,})", url)
if match:
video_id = match.group(1)
format_timestamp =''
if video_id:
timestamps = get_timestamps(video_id, text)
for ts in timestamps:
format_timestamp = format_timestamp + "\n" + ts
# Return data for use in future steps
return {
"youtube": format_timestamp,
}
After adding a new Python step timestamp
with this code, you can use the variable
{{steps.timestamp["$return_value"].youtube}}
in the markup of the final step.