Hey, we got a UI now

Got a gradio UI, thanks Opus. Is partially broken, but its something
2026-03-10 08:51:17 +00:00 · 2024-05-09 00:09:53 -07:00
parent 0aa1d9d4ac
commit 8f4f5be11f
3 changed files with 78 additions and 7 deletions
--- a/.gitignore
+++ b/.gitignore
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ Original: `YouTube contains an incredible amount of knowledge, much of which is
  * `python summarize.py https://www.youtube.com/watch?v=4nd1CDZP21s`
 - **Download Audio+Video from URL -> Transcribe audio from Video:**
  * `python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s`
- **Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` i.e. llama.cpp/`ooba`/`kobold`/`tabby`) API:**
+- **Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` (llama.cpp)/`ooba` (oobabooga/text-gen-webui)/`kobold` (kobold.cpp)/`tabby` (Tabbyapi)) API:**
  * `python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s -api <your choice of API>`
 - **Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
  * `python summarize.py ./local/file_on_your/system --api_name <API_name>`
@@ -25,8 +25,9 @@ Original: `YouTube contains an incredible amount of knowledge, much of which is
 - [Credits](#credits)

 ### <a name="what"></a>What?
- Use the script to transcribe a local file or remote url. 
-  * Any url youtube-dl supports _should_ work.
+- Use the script to (download->)transcribe(->summarize) a local file or remote url. 
+  * Any youtube video. (Playlists you have to use the `Get_Playlist_URLs.py` with `Get_Playlist_URLs.py <Playlist URL>` and it'll create a text file with all the URLs for each video, so you can pass the text file as input and they'll all be downloaded. Pull requests are welcome.)
+    * Any url youtube-dl supports _should_ work.
  * If you pass an API name (anthropic/cohere/grok/openai/) as a second argument, and add your API key to the config file, you can have your resulting transcriptions summarized as well. 
    * Alternatively, you can pass `llama`/`ooba`/`kobold`/`tabby` and have the script perform a request to your local API endpoint for summarization. You will need to modify the `llama_api_IP` value in the `config.txt` to reflect the `IP:Port` of your local server.
    * Or pass the `--api_url` argument with the `IP:Port` to avoid making changes to the `config.txt` file.
--- a/summarize.py
+++ b/summarize.py
@@ -1,9 +1,10 @@
 #!/usr/bin/env python3
+import gradio as gr
 import argparse, configparser, datetime, json, logging, os, platform, requests, shutil, subprocess, sys, time, unicodedata
 import zipfile
 from datetime import datetime
 import contextlib
-import ffmpeg # Used for issuing commands to underlying ffmpeg executable, pip package ffmpeg is from 2018
+import ffmpeg
 import torch
 import yt_dlp

@@ -1101,6 +1102,71 @@ def save_summary_to_file(summary, file_path):



+####################################################################################################################################
+# Gradio UI
+#
+
+def process_url(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, download_video_flag=False):
+    try:
+        results = main(input_path, api_name=api_name, api_key=api_key, num_speakers=num_speakers, whisper_model=whisper_model, offset=offset, vad_filter=vad_filter, download_video_flag=download_video_flag)
+        
+        if results:
+            transcription_result = results[0]
+            json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json')
+            with open(json_file_path, 'r') as file:
+                json_data = json.load(file)
+            
+            summary = transcription_result.get('summary', '')
+            
+            return json_data, summary, json_file_path, json_file_path.replace('.segments.json', '_summary.txt')
+        else:
+            return None, "No results found.", None, None
+    except Exception as e:
+        error_message = f"An error occurred: {str(e)}"
+        return None, error_message, None, None
+
+
+
+def launch_ui():
+    def process_transcription(json_data):
+        if json_data:
+            return "\n".join([item["text"] for item in json_data])
+        else:
+            return ""
+
+    iface = gr.Interface(
+        fn=process_url,
+        inputs=[
+            gr.components.Textbox(label="URL"),
+            gr.components.Dropdown(choices=["openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], label="API Name"),
+            gr.components.Textbox(label="API Key"),
+            gr.components.Number(value=2, label="Number of Speakers"),
+            gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model"),
+            gr.components.Number(value=0, label="Offset"),
+            gr.components.Checkbox(value=False, label="VAD Filter"),
+            gr.components.Checkbox(value=False, label="Download Video")
+        ],
+        outputs=[
+            gr.components.Textbox(label="Transcription", value=lambda: "", max_lines=10),
+            gr.components.Textbox(label="Summary"),
+            gr.components.File(label="Download Transcription JSON"),
+            gr.components.File(label="Download Summary")
+        ],
+        title="Video Transcription and Summarization",
+        description="Submit a video URL for transcription and summarization.",
+        allow_flagging="never"
+    )
+    iface.launch()
+
+#
+#
+####################################################################################################################################
+
+
+
+
+
+

 ####################################################################################################################################
 # Main()
@@ -1238,12 +1304,16 @@ if __name__ == "__main__":
    parser.add_argument('-off', '--offset', type=int, default=0, help='Offset in seconds (default: 0)')
    parser.add_argument('-vad', '--vad_filter', action='store_true', help='Enable VAD filter')
    parser.add_argument('-log', '--log_level', type=str, default='INFO', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Log level (default: INFO)')
+    parser.add_argument('-ui', '--user_interface', action='store_true', help='Launch the Gradio user interface')
    #parser.add_argument('--log_file', action=str, help='Where to save logfile (non-default)')
    args = parser.parse_args()

-    if args.input_path is None:
-        parser.print_help()
-        sys.exit(1)
+    if args.user_interface:
+        launch_ui()
+    else:
+        if args.input_path is None:
+            parser.print_help()
+            sys.exit(1)

    logging.basicConfig(level=getattr(logging, args.log_level), format='%(asctime)s - %(levelname)s - %(message)s')