From b93248ea6e7a8bb5358093a30964d0f51d46c246 Mon Sep 17 00:00:00 2001 From: Robert Date: Mon, 13 May 2024 23:03:24 -0700 Subject: [PATCH] https://www.youtube.com/watch?v=YbI9-41TxQ4 --- HF/app.py | 235 +++++++++++++++++++++++++++++++++------------------ summarize.py | 6 +- 2 files changed, 158 insertions(+), 83 deletions(-) diff --git a/HF/app.py b/HF/app.py index a9d230e..60265e8 100644 --- a/HF/app.py +++ b/HF/app.py @@ -17,6 +17,8 @@ import gradio as gr import torch import yt_dlp +log_level = "INFO" +logging.basicConfig(level=getattr(logging, log_level), format='%(asctime)s - %(levelname)s - %(message)s') os.environ["GRADIO_ANALYTICS_ENABLED"] = "False" ####### # Function Sections @@ -123,6 +125,9 @@ processing_choice = config.get('Processing', 'processing_choice', fallback='cpu' # Log file # logging.basicConfig(filename='debug-runtime.log', encoding='utf-8', level=logging.DEBUG) +# API Key Shenanigans +api_key = "UNSET" + # # ####################### @@ -318,7 +323,12 @@ def process_path(path): """ Decides whether the path is a URL or a local file and processes accordingly. """ if path.startswith('http'): logging.debug("file is a URL") - return get_youtube(path) # For YouTube URLs, modify to download and extract info + info_dict = get_youtube(path) + if info_dict: + return info_dict + else: + logging.error("Failed to get Video info") + return None elif os.path.exists(path): logging.debug("File is a path") return process_local_file(path) # For local files, define a function to handle them @@ -327,7 +337,7 @@ def process_path(path): return None -# FIXME +# def process_local_file(file_path): logging.info(f"Processing local file: {file_path}") title = normalize_title(os.path.splitext(os.path.basename(file_path))[0]) @@ -350,7 +360,7 @@ def process_local_file(file_path): # def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter, - download_video, download_audio): + download_video, download_audio, chunk_size): video_file_path = None try: results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers, @@ -359,21 +369,30 @@ def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_nam if results: transcription_result = results[0] json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json') + prettified_json_file_path = transcription_result['audio_file'].replace('.wav', '.segments_pretty.json') summary_file_path = json_file_path.replace('.segments.json', '_summary.txt') json_file_path = format_file_path(json_file_path) + prettified_json_file_path = format_file_path(prettified_json_file_path) summary_file_path = format_file_path(summary_file_path) + if download_video: + video_file_path = transcription_result['video_path'] if 'video_path' in transcription_result else None + + formatted_transcription = format_transcription(transcription_result) + + summary_text = transcription_result.get('summary', 'Summary not available') + if summary_file_path and os.path.exists(summary_file_path): - return transcription_result[ - 'transcription'], "Summary available", json_file_path, summary_file_path, video_file_path + return formatted_transcription, summary_text, prettified_json_file_path, summary_file_path, video_file_path, None else: - return transcription_result[ - 'transcription'], "Summary not available", json_file_path, None, video_file_path + return formatted_transcription, "Summary not available", prettified_json_file_path, None, video_file_path, None else: - return "No results found.", "Summary not available", None, None, None + return "No results found.", "Summary not available", None, None, None, None except Exception as e: - return str(e), "Error processing the request.", None, None, None + return str(e), "Error processing the request.", None, None, None, None + + def create_download_directory(title): @@ -409,8 +428,13 @@ def get_youtube(video_url): with yt_dlp.YoutubeDL(ydl_opts) as ydl: logging.debug("About to extract youtube info") info_dict = ydl.extract_info(video_url, download=False) - logging.debug("Youtube info successfully extracted") - return info_dict + logging.debug(f"Youtube info successfully extracted: {info_dict}") + if isinstance(info_dict, dict): + return info_dict + else: + logging.error("Invalid info_dict format") + return None + def get_playlist_videos(playlist_url): @@ -442,7 +466,7 @@ def download_video(video_url, download_path, info_dict, download_video_flag): logging.debug("About to normalize downloaded video title") title = normalize_title(info_dict['title']) - if download_video_flag == False: + if not download_video_flag: file_path = os.path.join(download_path, f"{title}.m4a") ydl_opts = { 'format': 'bestaudio[ext=m4a]', @@ -559,7 +583,6 @@ def convert_to_wav(video_file_path, offset=0): except Exception as e: logging.error("Error occurred - ffmpeg doesn't like windows") raise RuntimeError("ffmpeg failed") - exit() elif os.name == "posix": os.system(f'ffmpeg -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"') else: @@ -1229,62 +1252,114 @@ def process_text(api_key, text_file): return "Notice:", message + + + def format_file_path(file_path): # Helper function to check file existence and return an appropriate path or message return file_path if file_path and os.path.exists(file_path) else None + +def update_visibility(mode): + if mode == "Advanced": + # Show all inputs below URL + return [gr.update(visible=True)] * 9 + else: + # Hide all inputs below URL + return [gr.update(visible=False)] * 9 + +# https://www.gradio.app/guides/controlling-layout def launch_ui(demo_mode=False): - inputs = [ - gr.components.Textbox(label="URL (Mandatory)", - placeholder="Enter the video URL here"), - gr.components.Number(value=2, - label="Number of Speakers(Optional - Currently has no effect)", - visible=lambda x: x is not None), - gr.components.Dropdown(choices=whisper_models, - value="small.en", - label="Whisper Model(This is the ML model used for transcription.)", - visible=lambda x: x is not None), - gr.components.Textbox(label="Custom Prompt (Customize your summary, or ask a different question)", - placeholder="Q: As a professional summarizer, create a concise and comprehensive " - "summary of the provided text.\nA: Here is a detailed, bulleted list of the " - "key points made in the transcribed video and supporting arguments:", - lines=3), - gr.components.Number(value=0, - label="Offset (Seconds into the video to start transcribing at)", - visible=lambda x: x is not None), - gr.components.Dropdown( - choices=["huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], - label="API Name (Mandatory Unless you just want a Transcription)", visible=lambda x: x is not None), - gr.components.Textbox(label="API Key (Mandatory if API Name is specified)", - placeholder="Enter your API key here", visible=lambda x: x is not None), - gr.components.Checkbox(label="VAD Filter(Can safely ignore)", - value=False, visible=lambda x: x is not None), - gr.components.Checkbox(label="Download Video(Select to allow for file download of selected video)", - value=False, visible=lambda x: x is not None), - gr.components.Checkbox(label="Download Audio(Select to allow for file download of selected Video's Audio)", - value=False, visible=lambda x: x is not None) - ] + whisper_models = ["small.en", "medium.en", "large"] - outputs = [ - gr.components.Textbox(label="Transcription (Resulting Transcription from your input URL)"), - gr.components.Textbox(label="Summary or Status Message (Current status of Summary or Summary itself)"), - gr.components.File(label="Download Transcription as JSON (Download the Transcription as a file)", - visible=lambda x: x is not None), - gr.components.File(label="Download Summary as Text (Download the Summary as a file)", - visible=lambda x: x is not None), - gr.components.File(label="Download Video (Download the Video as a file)", visible=lambda x: x is not None), - gr.components.File(label="Download Audio (Download the Audio as a file)", visible=lambda x: x is not None) - ] + with gr.Blocks() as iface: + with gr.Tab("Audio Transcription + Summarization"): + with gr.Row(): + # Light/Dark mode toggle switch + theme_toggle = gr.Radio(choices=["Light", "Dark"], value="Light", + label="Light/Dark Mode Toggle (Toggle to change UI color scheme)") - iface = gr.Interface( - fn=process_url, - inputs=inputs, - outputs=outputs, - title="Video Transcription and Summarization", - description="Submit a video URL for transcription and summarization. Ensure you input all necessary information including API keys." - ) + # UI Mode toggle switch + ui_mode_toggle = gr.Radio(choices=["Simple", "Advanced"], value="Simple", + label="UI Mode (Toggle to show all options)") - iface.launch(share=False) + # URL input is always visible + url_input = gr.Textbox(label="URL (Mandatory)", placeholder="Enter the video URL here") + + # Inputs to be shown or hidden + num_speakers_input = gr.Number(value=2, label="Number of Speakers(Optional - Currently has no effect)", + visible=False) + whisper_model_input = gr.Dropdown(choices=whisper_models, value="small.en", + label="Whisper Model(This is the ML model used for transcription.)", + visible=False) + custom_prompt_input = gr.Textbox( + label="Custom Prompt (Customize your summary, or ask a different question)", + placeholder="Q: As a professional summarizer, create a concise and comprehensive summary of the provided text.\nA: Here is a detailed, bulleted list of the key points made in the transcribed video and supporting arguments:", + lines=3, visible=True) + offset_input = gr.Number(value=0, label="Offset (Seconds into the video to start transcribing at)", + visible=False) + api_name_input = gr.Dropdown( + choices=[None,"huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], value=None, + label="API Name (Mandatory Unless you just want a Transcription)", visible=True) + api_key_input = gr.Textbox(label="API Key (Mandatory if API Name is specified)", + placeholder="Enter your API key here", visible=True) + vad_filter_input = gr.Checkbox(label="VAD Filter(Can safely ignore)", value=False, visible=False) + download_video_input = gr.Checkbox( + label="Download Video(Select to allow for file download of selected video)", value=False, visible=False) + download_audio_input = gr.Checkbox( + label="Download Audio(Select to allow for file download of selected Video's Audio)", value=False, + visible=True) + detail_level_input = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.1, interactive=False, + label="Detail Level (Slide me)", visible=True) + + inputs = [num_speakers_input, whisper_model_input, custom_prompt_input, offset_input, api_name_input, + api_key_input, vad_filter_input, download_video_input, download_audio_input, detail_level_input] + + # Function to toggle Light/Dark Mode + def toggle_light(mode): + dark = (mode == "Dark") + return {"__theme": "dark" if dark else "light"} + + # Set the event listener for the Light/Dark mode toggle switch + theme_toggle.change(fn=toggle_light, inputs=theme_toggle, outputs=None) + + # Function to toggle visibility of advanced inputs + def toggle_ui(mode): + visible = (mode == "Advanced") + return [visible] * len(inputs) + + # Set the event listener for the UI Mode toggle switch + ui_mode_toggle.change(fn=toggle_ui, inputs=ui_mode_toggle, outputs=inputs) + + # Combine URL input and inputs + all_inputs = [url_input] + inputs + + outputs = [ + gr.Textbox(label="Transcription (Resulting Transcription from your input URL)"), + gr.Textbox(label="Summary or Status Message (Current status of Summary or Summary itself)"), + gr.File(label="Download Transcription as JSON (Download the Transcription as a file)"), + gr.File(label="Download Summary as Text (Download the Summary as a file)"), + gr.File(label="Download Video (Download the Video as a file)"), + gr.File(label="Download Audio (Download the Audio as a file)") + ] + + gr.Interface( + fn=process_url, + inputs=all_inputs, + outputs=outputs, + title="Video Transcription and Summarization", + description="Submit a video URL for transcription and summarization. Ensure you input all necessary information including API keys." + ) + + with gr.Tab("Transcription & Summarization History"): + image_input = gr.Image(label="Upload Image") + image_output = gr.Image(label="Processed Image") + + with gr.Accordion("Open for More!", open=False): + gr.Markdown("Look at me...") + gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.1, interactive=True, label="Slide me") + + iface.launch(share=False) # # @@ -1360,62 +1435,62 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model= logging.debug(f"MAIN: Summarization being performed by {api_name}") json_file_path = audio_file.replace('.wav', '.segments.json') if api_name.lower() == 'openai': - api_key = openai_api_key + openai_api_key = api_key if api_key else config.get('API', 'openai_api_key', fallback=None) try: logging.debug(f"MAIN: trying to summarize with openAI") - summary = summarize_with_openai(api_key, json_file_path, openai_model, custom_prompt) + summary = summarize_with_openai(openai_api_key, json_file_path, openai_model, custom_prompt) except requests.exceptions.ConnectionError: requests.status_code = "Connection: " elif api_name.lower() == "anthropic": - api_key = anthropic_api_key + anthropic_api_key = api_key if api_key else config.get('API', 'anthropic_api_key', fallback=None) try: logging.debug(f"MAIN: Trying to summarize with anthropic") - summary = summarize_with_claude(api_key, json_file_path, anthropic_model, custom_prompt) + summary = summarize_with_claude(anthropic_api_key, json_file_path, anthropic_model, custom_prompt) except requests.exceptions.ConnectionError: requests.status_code = "Connection: " elif api_name.lower() == "cohere": - api_key = cohere_api_key + cohere_api_key = api_key if api_key else config.get('API', 'cohere_api_key', fallback=None) try: logging.debug(f"MAIN: Trying to summarize with cohere") - summary = summarize_with_cohere(api_key, json_file_path, cohere_model, custom_prompt) + summary = summarize_with_cohere(cohere_api_key, json_file_path, cohere_model, custom_prompt) except requests.exceptions.ConnectionError: requests.status_code = "Connection: " elif api_name.lower() == "groq": - api_key = groq_api_key + groq_api_key = api_key if api_key else config.get('API', 'groq_api_key', fallback=None) try: logging.debug(f"MAIN: Trying to summarize with Groq") - summary = summarize_with_groq(api_key, json_file_path, groq_model, custom_prompt) + summary = summarize_with_groq(groq_api_key, json_file_path, groq_model, custom_prompt) except requests.exceptions.ConnectionError: requests.status_code = "Connection: " elif api_name.lower() == "llama": - token = llama_api_key + llama_token = api_key if api_key else config.get('API', 'llama_api_key', fallback=None) llama_ip = llama_api_IP try: logging.debug(f"MAIN: Trying to summarize with Llama.cpp") - summary = summarize_with_llama(llama_ip, json_file_path, token, custom_prompt) + summary = summarize_with_llama(llama_ip, json_file_path, llama_token, custom_prompt) except requests.exceptions.ConnectionError: requests.status_code = "Connection: " elif api_name.lower() == "kobold": - token = kobold_api_key + kobold_token = api_key if api_key else config.get('API', 'kobold_api_key', fallback=None) kobold_ip = kobold_api_IP try: logging.debug(f"MAIN: Trying to summarize with kobold.cpp") - summary = summarize_with_kobold(kobold_ip, json_file_path, custom_prompt) + summary = summarize_with_kobold(kobold_ip, json_file_path, kobold_token, custom_prompt) except requests.exceptions.ConnectionError: requests.status_code = "Connection: " elif api_name.lower() == "ooba": - token = ooba_api_key + ooba_token = api_key if api_key else config.get('API', 'ooba_api_key', fallback=None) ooba_ip = ooba_api_IP try: logging.debug(f"MAIN: Trying to summarize with oobabooga") - summary = summarize_with_oobabooga(ooba_ip, json_file_path, custom_prompt) + summary = summarize_with_oobabooga(ooba_ip, json_file_path, ooba_token, custom_prompt) except requests.exceptions.ConnectionError: requests.status_code = "Connection: " elif api_name.lower() == "huggingface": - api_key = huggingface_api_key + huggingface_api_key = api_key if api_key else config.get('API', 'huggingface_api_key', fallback=None) try: logging.debug(f"MAIN: Trying to summarize with huggingface") - summarize_with_huggingface(api_key, json_file_path, custom_prompt) + summarize_with_huggingface(huggingface_api_key, json_file_path, custom_prompt) except requests.exceptions.ConnectionError: requests.status_code = "Connection: " @@ -1434,10 +1509,10 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model= except Exception as e: logging.error(f"Error processing path: {path}") logging.error(str(e)) - end_time = time.monotonic() - # print("Total program execution time: " + timedelta(seconds=end_time - start_time)) + # end_time = time.monotonic() + # print("Total program execution time: " + timedelta(seconds=end_time - start_time)) - return results + return results if __name__ == "__main__": diff --git a/summarize.py b/summarize.py index 08cae86..49fa1eb 100644 --- a/summarize.py +++ b/summarize.py @@ -1253,15 +1253,15 @@ def launch_ui(demo_mode=False): offset_input = gr.Number(value=0, label="Offset (Seconds into the video to start transcribing at)", visible=True) api_name_input = gr.Dropdown( - choices=["huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], value=None, + choices=[None,"huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], value=None, label="API Name (Mandatory Unless you just want a Transcription)", visible=True) api_key_input = gr.Textbox(label="API Key (Mandatory if API Name is specified)", placeholder="Enter your API key here", visible=True) vad_filter_input = gr.Checkbox(label="VAD Filter(Can safely ignore)", value=False, visible=True) download_video_input = gr.Checkbox( - label="Download Video(Select to allow for file download of selected video)", value=True, visible=True) + label="Download Video(Select to allow for file download of selected video)", value=False, visible=True) download_audio_input = gr.Checkbox( - label="Download Audio(Select to allow for file download of selected Video's Audio)", value=False, + label="Download Audio(Select to allow for file download of selected Video's Audio)", value=True, visible=True) detail_level_input = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.1, interactive=True, label="Detail Level (Slide me)", visible=True)