https://www.youtube.com/watch?v=YbI9-41TxQ4

2026-03-10 08:51:17 +00:00 · 2024-05-13 23:03:24 -07:00
parent 3e12a4958a
commit b93248ea6e
2 changed files with 158 additions and 83 deletions
--- a/HF/app.py
+++ b/HF/app.py
@@ -17,6 +17,8 @@ import gradio as gr
 import torch
 import yt_dlp

+log_level = "INFO"
+logging.basicConfig(level=getattr(logging, log_level), format='%(asctime)s - %(levelname)s - %(message)s')
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
 #######
 # Function Sections
@@ -123,6 +125,9 @@ processing_choice = config.get('Processing', 'processing_choice', fallback='cpu'
 # Log file
 # logging.basicConfig(filename='debug-runtime.log', encoding='utf-8', level=logging.DEBUG)

+# API Key Shenanigans
+api_key = "UNSET"
+
 #
 #
 #######################
@@ -318,7 +323,12 @@ def process_path(path):
    """ Decides whether the path is a URL or a local file and processes accordingly. """
    if path.startswith('http'):
        logging.debug("file is a URL")
-        return get_youtube(path)  # For YouTube URLs, modify to download and extract info
+        info_dict = get_youtube(path)
+        if info_dict:
+            return info_dict
+        else:
+            logging.error("Failed to get Video info")
+            return None
    elif os.path.exists(path):
        logging.debug("File is a path")
        return process_local_file(path)  # For local files, define a function to handle them
@@ -327,7 +337,7 @@ def process_path(path):
        return None


-# FIXME
+#
 def process_local_file(file_path):
    logging.info(f"Processing local file: {file_path}")
    title = normalize_title(os.path.splitext(os.path.basename(file_path))[0])
@@ -350,7 +360,7 @@ def process_local_file(file_path):
 #

 def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter,
-                download_video, download_audio):
+                download_video, download_audio, chunk_size):
    video_file_path = None
    try:
        results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers,
@@ -359,21 +369,30 @@ def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_nam
        if results:
            transcription_result = results[0]
            json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json')
+            prettified_json_file_path = transcription_result['audio_file'].replace('.wav', '.segments_pretty.json')
            summary_file_path = json_file_path.replace('.segments.json', '_summary.txt')

            json_file_path = format_file_path(json_file_path)
+            prettified_json_file_path = format_file_path(prettified_json_file_path)
            summary_file_path = format_file_path(summary_file_path)

+            if download_video:
+                video_file_path = transcription_result['video_path'] if 'video_path' in transcription_result else None
+
+            formatted_transcription = format_transcription(transcription_result)
+
+            summary_text = transcription_result.get('summary', 'Summary not available')
+
            if summary_file_path and os.path.exists(summary_file_path):
-                return transcription_result[
-                    'transcription'], "Summary available", json_file_path, summary_file_path, video_file_path
+                return formatted_transcription, summary_text, prettified_json_file_path, summary_file_path, video_file_path, None
            else:
-                return transcription_result[
-                    'transcription'], "Summary not available", json_file_path, None, video_file_path
+                return formatted_transcription, "Summary not available", prettified_json_file_path, None, video_file_path, None
        else:
-            return "No results found.", "Summary not available", None, None, None
+            return "No results found.", "Summary not available", None, None, None, None
    except Exception as e:
-        return str(e), "Error processing the request.", None, None, None
+        return str(e), "Error processing the request.", None, None, None, None
+
+


 def create_download_directory(title):
@@ -409,8 +428,13 @@ def get_youtube(video_url):
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        logging.debug("About to extract youtube info")
        info_dict = ydl.extract_info(video_url, download=False)
-        logging.debug("Youtube info successfully extracted")
-    return info_dict
+        logging.debug(f"Youtube info successfully extracted: {info_dict}")
+        if isinstance(info_dict, dict):
+            return info_dict
+        else:
+            logging.error("Invalid info_dict format")
+            return None
+


 def get_playlist_videos(playlist_url):
@@ -442,7 +466,7 @@ def download_video(video_url, download_path, info_dict, download_video_flag):
    logging.debug("About to normalize downloaded video title")
    title = normalize_title(info_dict['title'])

-    if download_video_flag == False:
+    if not download_video_flag:
        file_path = os.path.join(download_path, f"{title}.m4a")
        ydl_opts = {
            'format': 'bestaudio[ext=m4a]',
@@ -559,7 +583,6 @@ def convert_to_wav(video_file_path, offset=0):
            except Exception as e:
                logging.error("Error occurred - ffmpeg doesn't like windows")
                raise RuntimeError("ffmpeg failed")
-                exit()
        elif os.name == "posix":
            os.system(f'ffmpeg -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"')
        else:
@@ -1229,62 +1252,114 @@ def process_text(api_key, text_file):
        return "Notice:", message


+
+
+
 def format_file_path(file_path):
    # Helper function to check file existence and return an appropriate path or message
    return file_path if file_path and os.path.exists(file_path) else None

+
+def update_visibility(mode):
+    if mode == "Advanced":
+        # Show all inputs below URL
+        return [gr.update(visible=True)] * 9
+    else:
+        # Hide all inputs below URL
+        return [gr.update(visible=False)] * 9
+
+# https://www.gradio.app/guides/controlling-layout
 def launch_ui(demo_mode=False):
-    inputs = [
-        gr.components.Textbox(label="URL (Mandatory)",
-                              placeholder="Enter the video URL here"),
-        gr.components.Number(value=2,
-                             label="Number of Speakers(Optional - Currently has no effect)",
-                             visible=lambda x: x is not None),
-        gr.components.Dropdown(choices=whisper_models,
-                               value="small.en",
-                               label="Whisper Model(This is the ML model used for transcription.)",
-                               visible=lambda x: x is not None),
-        gr.components.Textbox(label="Custom Prompt (Customize your summary, or ask a different question)",
-                              placeholder="Q: As a professional summarizer, create a concise and comprehensive "
-                                          "summary of the provided text.\nA: Here is a detailed, bulleted list of the "
-                                          "key points made in the transcribed video and supporting arguments:",
-                              lines=3),
-        gr.components.Number(value=0,
-                             label="Offset (Seconds into the video to start transcribing at)",
-                             visible=lambda x: x is not None),
-        gr.components.Dropdown(
-            choices=["huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"],
-            label="API Name (Mandatory Unless you just want a Transcription)", visible=lambda x: x is not None),
-        gr.components.Textbox(label="API Key (Mandatory if API Name is specified)",
-                              placeholder="Enter your API key here", visible=lambda x: x is not None),
-        gr.components.Checkbox(label="VAD Filter(Can safely ignore)",
-                               value=False, visible=lambda x: x is not None),
-        gr.components.Checkbox(label="Download Video(Select to allow for file download of selected video)",
-                               value=False, visible=lambda x: x is not None),
-        gr.components.Checkbox(label="Download Audio(Select to allow for file download of selected Video's Audio)",
-                               value=False, visible=lambda x: x is not None)
-    ]
+    whisper_models = ["small.en", "medium.en", "large"]

-    outputs = [
-        gr.components.Textbox(label="Transcription (Resulting Transcription from your input URL)"),
-        gr.components.Textbox(label="Summary or Status Message (Current status of Summary or Summary itself)"),
-        gr.components.File(label="Download Transcription as JSON (Download the Transcription as a file)",
-                           visible=lambda x: x is not None),
-        gr.components.File(label="Download Summary as Text (Download the Summary as a file)",
-                           visible=lambda x: x is not None),
-        gr.components.File(label="Download Video (Download the Video as a file)", visible=lambda x: x is not None),
-        gr.components.File(label="Download Audio (Download the Audio as a file)", visible=lambda x: x is not None)
-    ]
+    with gr.Blocks() as iface:
+        with gr.Tab("Audio Transcription + Summarization"):
+            with gr.Row():
+                # Light/Dark mode toggle switch
+                theme_toggle = gr.Radio(choices=["Light", "Dark"], value="Light",
+                                        label="Light/Dark Mode Toggle (Toggle to change UI color scheme)")

-    iface = gr.Interface(
-        fn=process_url,
-        inputs=inputs,
-        outputs=outputs,
-        title="Video Transcription and Summarization",
-        description="Submit a video URL for transcription and summarization. Ensure you input all necessary information including API keys."
-    )
+                # UI Mode toggle switch
+                ui_mode_toggle = gr.Radio(choices=["Simple", "Advanced"], value="Simple",
+                                          label="UI Mode (Toggle to show all options)")

-    iface.launch(share=False)
+            # URL input is always visible
+            url_input = gr.Textbox(label="URL (Mandatory)", placeholder="Enter the video URL here")
+
+            # Inputs to be shown or hidden
+            num_speakers_input = gr.Number(value=2, label="Number of Speakers(Optional - Currently has no effect)",
+                                           visible=False)
+            whisper_model_input = gr.Dropdown(choices=whisper_models, value="small.en",
+                                              label="Whisper Model(This is the ML model used for transcription.)",
+                                              visible=False)
+            custom_prompt_input = gr.Textbox(
+                label="Custom Prompt (Customize your summary, or ask a different question)",
+                placeholder="Q: As a professional summarizer, create a concise and comprehensive summary of the provided text.\nA: Here is a detailed, bulleted list of the key points made in the transcribed video and supporting arguments:",
+                lines=3, visible=True)
+            offset_input = gr.Number(value=0, label="Offset (Seconds into the video to start transcribing at)",
+                                     visible=False)
+            api_name_input = gr.Dropdown(
+                choices=[None,"huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], value=None,
+                label="API Name (Mandatory Unless you just want a Transcription)", visible=True)
+            api_key_input = gr.Textbox(label="API Key (Mandatory if API Name is specified)",
+                                       placeholder="Enter your API key here", visible=True)
+            vad_filter_input = gr.Checkbox(label="VAD Filter(Can safely ignore)", value=False, visible=False)
+            download_video_input = gr.Checkbox(
+                label="Download Video(Select to allow for file download of selected video)", value=False, visible=False)
+            download_audio_input = gr.Checkbox(
+                label="Download Audio(Select to allow for file download of selected Video's Audio)", value=False,
+                visible=True)
+            detail_level_input = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.1, interactive=False,
+                                           label="Detail Level (Slide me)", visible=True)
+
+            inputs = [num_speakers_input, whisper_model_input, custom_prompt_input, offset_input, api_name_input,
+                      api_key_input, vad_filter_input, download_video_input, download_audio_input, detail_level_input]
+
+            # Function to toggle Light/Dark Mode
+            def toggle_light(mode):
+                dark = (mode == "Dark")
+                return {"__theme": "dark" if dark else "light"}
+
+            # Set the event listener for the Light/Dark mode toggle switch
+            theme_toggle.change(fn=toggle_light, inputs=theme_toggle, outputs=None)
+
+            # Function to toggle visibility of advanced inputs
+            def toggle_ui(mode):
+                visible = (mode == "Advanced")
+                return [visible] * len(inputs)
+
+            # Set the event listener for the UI Mode toggle switch
+            ui_mode_toggle.change(fn=toggle_ui, inputs=ui_mode_toggle, outputs=inputs)
+
+            # Combine URL input and inputs
+            all_inputs = [url_input] + inputs
+
+            outputs = [
+                gr.Textbox(label="Transcription (Resulting Transcription from your input URL)"),
+                gr.Textbox(label="Summary or Status Message (Current status of Summary or Summary itself)"),
+                gr.File(label="Download Transcription as JSON (Download the Transcription as a file)"),
+                gr.File(label="Download Summary as Text (Download the Summary as a file)"),
+                gr.File(label="Download Video (Download the Video as a file)"),
+                gr.File(label="Download Audio (Download the Audio as a file)")
+            ]
+
+            gr.Interface(
+                fn=process_url,
+                inputs=all_inputs,
+                outputs=outputs,
+                title="Video Transcription and Summarization",
+                description="Submit a video URL for transcription and summarization. Ensure you input all necessary information including API keys."
+            )
+
+        with gr.Tab("Transcription & Summarization History"):
+            image_input = gr.Image(label="Upload Image")
+            image_output = gr.Image(label="Processed Image")
+
+        with gr.Accordion("Open for More!", open=False):
+            gr.Markdown("Look at me...")
+            gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.1, interactive=True, label="Slide me")
+
+        iface.launch(share=False)

 #
 #
@@ -1360,62 +1435,62 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
                    logging.debug(f"MAIN: Summarization being performed by {api_name}")
                    json_file_path = audio_file.replace('.wav', '.segments.json')
                    if api_name.lower() == 'openai':
-                        api_key = openai_api_key
+                        openai_api_key = api_key if api_key else config.get('API', 'openai_api_key', fallback=None)
                        try:
                            logging.debug(f"MAIN: trying to summarize with openAI")
-                            summary = summarize_with_openai(api_key, json_file_path, openai_model, custom_prompt)
+                            summary = summarize_with_openai(openai_api_key, json_file_path, openai_model, custom_prompt)
                        except requests.exceptions.ConnectionError:
                            requests.status_code = "Connection: "
                    elif api_name.lower() == "anthropic":
-                        api_key = anthropic_api_key
+                        anthropic_api_key = api_key if api_key else config.get('API', 'anthropic_api_key', fallback=None)
                        try:
                            logging.debug(f"MAIN: Trying to summarize with anthropic")
-                            summary = summarize_with_claude(api_key, json_file_path, anthropic_model, custom_prompt)
+                            summary = summarize_with_claude(anthropic_api_key, json_file_path, anthropic_model, custom_prompt)
                        except requests.exceptions.ConnectionError:
                            requests.status_code = "Connection: "
                    elif api_name.lower() == "cohere":
-                        api_key = cohere_api_key
+                        cohere_api_key = api_key if api_key else config.get('API', 'cohere_api_key', fallback=None)
                        try:
                            logging.debug(f"MAIN: Trying to summarize with cohere")
-                            summary = summarize_with_cohere(api_key, json_file_path, cohere_model, custom_prompt)
+                            summary = summarize_with_cohere(cohere_api_key, json_file_path, cohere_model, custom_prompt)
                        except requests.exceptions.ConnectionError:
                            requests.status_code = "Connection: "
                    elif api_name.lower() == "groq":
-                        api_key = groq_api_key
+                        groq_api_key = api_key if api_key else config.get('API', 'groq_api_key', fallback=None)
                        try:
                            logging.debug(f"MAIN: Trying to summarize with Groq")
-                            summary = summarize_with_groq(api_key, json_file_path, groq_model, custom_prompt)
+                            summary = summarize_with_groq(groq_api_key, json_file_path, groq_model, custom_prompt)
                        except requests.exceptions.ConnectionError:
                            requests.status_code = "Connection: "
                    elif api_name.lower() == "llama":
-                        token = llama_api_key
+                        llama_token = api_key if api_key else config.get('API', 'llama_api_key', fallback=None)
                        llama_ip = llama_api_IP
                        try:
                            logging.debug(f"MAIN: Trying to summarize with Llama.cpp")
-                            summary = summarize_with_llama(llama_ip, json_file_path, token, custom_prompt)
+                            summary = summarize_with_llama(llama_ip, json_file_path, llama_token, custom_prompt)
                        except requests.exceptions.ConnectionError:
                            requests.status_code = "Connection: "
                    elif api_name.lower() == "kobold":
-                        token = kobold_api_key
+                        kobold_token = api_key if api_key else config.get('API', 'kobold_api_key', fallback=None)
                        kobold_ip = kobold_api_IP
                        try:
                            logging.debug(f"MAIN: Trying to summarize with kobold.cpp")
-                            summary = summarize_with_kobold(kobold_ip, json_file_path, custom_prompt)
+                            summary = summarize_with_kobold(kobold_ip, json_file_path, kobold_token, custom_prompt)
                        except requests.exceptions.ConnectionError:
                            requests.status_code = "Connection: "
                    elif api_name.lower() == "ooba":
-                        token = ooba_api_key
+                        ooba_token = api_key if api_key else config.get('API', 'ooba_api_key', fallback=None)
                        ooba_ip = ooba_api_IP
                        try:
                            logging.debug(f"MAIN: Trying to summarize with oobabooga")
-                            summary = summarize_with_oobabooga(ooba_ip, json_file_path, custom_prompt)
+                            summary = summarize_with_oobabooga(ooba_ip, json_file_path, ooba_token, custom_prompt)
                        except requests.exceptions.ConnectionError:
                            requests.status_code = "Connection: "
                    elif api_name.lower() == "huggingface":
-                        api_key = huggingface_api_key
+                        huggingface_api_key = api_key if api_key else config.get('API', 'huggingface_api_key', fallback=None)
                        try:
                            logging.debug(f"MAIN: Trying to summarize with huggingface")
-                            summarize_with_huggingface(api_key, json_file_path, custom_prompt)
+                            summarize_with_huggingface(huggingface_api_key, json_file_path, custom_prompt)
                        except requests.exceptions.ConnectionError:
                            requests.status_code = "Connection: "

@@ -1434,10 +1509,10 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
        except Exception as e:
            logging.error(f"Error processing path: {path}")
            logging.error(str(e))
-    end_time = time.monotonic()
-    # print("Total program execution time: " + timedelta(seconds=end_time - start_time))
+        # end_time = time.monotonic()
+        # print("Total program execution time: " + timedelta(seconds=end_time - start_time))

-    return results
+        return results


 if __name__ == "__main__":
--- a/summarize.py
+++ b/summarize.py
@@ -1253,15 +1253,15 @@ def launch_ui(demo_mode=False):
            offset_input = gr.Number(value=0, label="Offset (Seconds into the video to start transcribing at)",
                                     visible=True)
            api_name_input = gr.Dropdown(
-                choices=["huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], value=None,
+                choices=[None,"huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], value=None,
                label="API Name (Mandatory Unless you just want a Transcription)", visible=True)
            api_key_input = gr.Textbox(label="API Key (Mandatory if API Name is specified)",
                                       placeholder="Enter your API key here", visible=True)
            vad_filter_input = gr.Checkbox(label="VAD Filter(Can safely ignore)", value=False, visible=True)
            download_video_input = gr.Checkbox(
-                label="Download Video(Select to allow for file download of selected video)", value=True, visible=True)
+                label="Download Video(Select to allow for file download of selected video)", value=False, visible=True)
            download_audio_input = gr.Checkbox(
-                label="Download Audio(Select to allow for file download of selected Video's Audio)", value=False,
+                label="Download Audio(Select to allow for file download of selected Video's Audio)", value=True,
                visible=True)
            detail_level_input = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.1, interactive=True,
                                           label="Detail Level (Slide me)", visible=True)