visit
import re
def extract_youtube_video_id(url: str) -> str | None:
"""
Extract the video ID from the URL
//www.youtube.com/watch?v=XXX -> XXX
//youtu.be/XXX -> XXX
"""
found = re.search(r"(?:youtu\.be\/|watch\?v=)([\w-]+)", url)
if found:
return found.group(1)
return None
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
def get_video_transcript(video_id: str) -> str | None:
"""
Fetch the transcript of the provided YouTube video
"""
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
except TranscriptsDisabled:
# The video doesn't have a transcript
return None
text = " ".join([line["text"] for line in transcript])
return text
def generate_summary(text: str) -> str:
"""
Generate a summary of the provided text using OpenAI API
"""
# Initialize the OpenAI API client
openai.api_key = os.environ["OPENAI_API_KEY"]
# Use GPT to generate a summary
instructions = "Please summarize the provided text"
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": instructions},
{"role": "user", "content": text}
],
temperature=0.2,
n=1,
max_tokens=200,
presence_penalty=0,
frequency_penalty=0.1,
)
# Return the generated summary
return response.choices[0].message.content.strip()
gpt-3.5-turbo
instead of text-davinci-003
because it performs similarly but is 10 times cheaper. When GPT-4 is publicly released, you can easily replace the model with a better one.role
(either system
, user
, or assistant
) and content
(the text message itself). The first message with the role system should contain instructions for the AI to guide it.
def summarize_youtube_video(video_url: str) -> str:
"""
Summarize the provided YouTube video
"""
# Extract the video ID from the URL
video_id = extract_youtube_video_id(video_url)
# Fetch the video transcript
transcript = get_video_transcript(video_id)
# If no transcript is found, return an error message
if not transcript:
return f"No English transcript found " \
f"for this video: {video_url}"
# Generate the summary
summary = generate_summary(transcript)
# Return the summary
return summary
if __name__ == '__main__':
url = "//www.youtube.com/watch?v=D1R-jKKp3NA"
print(summarize_youtube_video(url))
Here is an example of video summary generated by GPT-3:
Steve Jobs gave a commencement speech at a university where he shared three stories from his life. The first story was about dropping out of college and how it led him to take a calligraphy class, which later influenced the design of the Macintosh computer. The second story was about getting fired from Apple, which allowed him to start over and create successful companies like Pixar. The third story was about his experience with cancer and how it taught him to live each day as if it were his last. He encouraged the graduates to find what they love, not settle, and to have the courage to follow their hearts and intuition. He ended his speech with the message "stay hungry, stay foolish.”