diff --git a/.gitignore b/.gitignore index 95fffd1..1858367 100644 Binary files a/.gitignore and b/.gitignore differ diff --git a/README.md b/README.md index f8d8c44..9b925f0 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Original: `YouTube contains an incredible amount of knowledge, much of which is * `python summarize.py https://www.youtube.com/watch?v=4nd1CDZP21s` - **Download Audio+Video from URL -> Transcribe audio from Video:** * `python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s` -- **Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` i.e. llama.cpp/`ooba`/`kobold`/`tabby`) API:** +- **Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` (llama.cpp)/`ooba` (oobabooga/text-gen-webui)/`kobold` (kobold.cpp)/`tabby` (Tabbyapi)) API:** * `python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s -api ` - **Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:** * `python summarize.py ./local/file_on_your/system --api_name ` @@ -25,8 +25,9 @@ Original: `YouTube contains an incredible amount of knowledge, much of which is - [Credits](#credits) ### What? -- Use the script to transcribe a local file or remote url. - * Any url youtube-dl supports _should_ work. +- Use the script to (download->)transcribe(->summarize) a local file or remote url. + * Any youtube video. (Playlists you have to use the `Get_Playlist_URLs.py` with `Get_Playlist_URLs.py ` and it'll create a text file with all the URLs for each video, so you can pass the text file as input and they'll all be downloaded. Pull requests are welcome.) + * Any url youtube-dl supports _should_ work. * If you pass an API name (anthropic/cohere/grok/openai/) as a second argument, and add your API key to the config file, you can have your resulting transcriptions summarized as well. * Alternatively, you can pass `llama`/`ooba`/`kobold`/`tabby` and have the script perform a request to your local API endpoint for summarization. You will need to modify the `llama_api_IP` value in the `config.txt` to reflect the `IP:Port` of your local server. * Or pass the `--api_url` argument with the `IP:Port` to avoid making changes to the `config.txt` file. diff --git a/summarize.py b/summarize.py index 6dd19fd..b22a3b5 100644 --- a/summarize.py +++ b/summarize.py @@ -1,9 +1,10 @@ #!/usr/bin/env python3 +import gradio as gr import argparse, configparser, datetime, json, logging, os, platform, requests, shutil, subprocess, sys, time, unicodedata import zipfile from datetime import datetime import contextlib -import ffmpeg # Used for issuing commands to underlying ffmpeg executable, pip package ffmpeg is from 2018 +import ffmpeg import torch import yt_dlp @@ -1101,6 +1102,71 @@ def save_summary_to_file(summary, file_path): +#################################################################################################################################### +# Gradio UI +# + +def process_url(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, download_video_flag=False): + try: + results = main(input_path, api_name=api_name, api_key=api_key, num_speakers=num_speakers, whisper_model=whisper_model, offset=offset, vad_filter=vad_filter, download_video_flag=download_video_flag) + + if results: + transcription_result = results[0] + json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json') + with open(json_file_path, 'r') as file: + json_data = json.load(file) + + summary = transcription_result.get('summary', '') + + return json_data, summary, json_file_path, json_file_path.replace('.segments.json', '_summary.txt') + else: + return None, "No results found.", None, None + except Exception as e: + error_message = f"An error occurred: {str(e)}" + return None, error_message, None, None + + + +def launch_ui(): + def process_transcription(json_data): + if json_data: + return "\n".join([item["text"] for item in json_data]) + else: + return "" + + iface = gr.Interface( + fn=process_url, + inputs=[ + gr.components.Textbox(label="URL"), + gr.components.Dropdown(choices=["openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], label="API Name"), + gr.components.Textbox(label="API Key"), + gr.components.Number(value=2, label="Number of Speakers"), + gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model"), + gr.components.Number(value=0, label="Offset"), + gr.components.Checkbox(value=False, label="VAD Filter"), + gr.components.Checkbox(value=False, label="Download Video") + ], + outputs=[ + gr.components.Textbox(label="Transcription", value=lambda: "", max_lines=10), + gr.components.Textbox(label="Summary"), + gr.components.File(label="Download Transcription JSON"), + gr.components.File(label="Download Summary") + ], + title="Video Transcription and Summarization", + description="Submit a video URL for transcription and summarization.", + allow_flagging="never" + ) + iface.launch() + +# +# +#################################################################################################################################### + + + + + + #################################################################################################################################### # Main() @@ -1238,12 +1304,16 @@ if __name__ == "__main__": parser.add_argument('-off', '--offset', type=int, default=0, help='Offset in seconds (default: 0)') parser.add_argument('-vad', '--vad_filter', action='store_true', help='Enable VAD filter') parser.add_argument('-log', '--log_level', type=str, default='INFO', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Log level (default: INFO)') + parser.add_argument('-ui', '--user_interface', action='store_true', help='Launch the Gradio user interface') #parser.add_argument('--log_file', action=str, help='Where to save logfile (non-default)') args = parser.parse_args() - if args.input_path is None: - parser.print_help() - sys.exit(1) + if args.user_interface: + launch_ui() + else: + if args.input_path is None: + parser.print_help() + sys.exit(1) logging.basicConfig(level=getattr(logging, args.log_level), format='%(asctime)s - %(levelname)s - %(message)s')