mirror of
https://github.com/jlengrand/tldw.git
synced 2026-03-10 08:51:17 +00:00
This commit is contained in:
235
HF/app.py
235
HF/app.py
@@ -17,6 +17,8 @@ import gradio as gr
|
||||
import torch
|
||||
import yt_dlp
|
||||
|
||||
log_level = "INFO"
|
||||
logging.basicConfig(level=getattr(logging, log_level), format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
|
||||
#######
|
||||
# Function Sections
|
||||
@@ -123,6 +125,9 @@ processing_choice = config.get('Processing', 'processing_choice', fallback='cpu'
|
||||
# Log file
|
||||
# logging.basicConfig(filename='debug-runtime.log', encoding='utf-8', level=logging.DEBUG)
|
||||
|
||||
# API Key Shenanigans
|
||||
api_key = "UNSET"
|
||||
|
||||
#
|
||||
#
|
||||
#######################
|
||||
@@ -318,7 +323,12 @@ def process_path(path):
|
||||
""" Decides whether the path is a URL or a local file and processes accordingly. """
|
||||
if path.startswith('http'):
|
||||
logging.debug("file is a URL")
|
||||
return get_youtube(path) # For YouTube URLs, modify to download and extract info
|
||||
info_dict = get_youtube(path)
|
||||
if info_dict:
|
||||
return info_dict
|
||||
else:
|
||||
logging.error("Failed to get Video info")
|
||||
return None
|
||||
elif os.path.exists(path):
|
||||
logging.debug("File is a path")
|
||||
return process_local_file(path) # For local files, define a function to handle them
|
||||
@@ -327,7 +337,7 @@ def process_path(path):
|
||||
return None
|
||||
|
||||
|
||||
# FIXME
|
||||
#
|
||||
def process_local_file(file_path):
|
||||
logging.info(f"Processing local file: {file_path}")
|
||||
title = normalize_title(os.path.splitext(os.path.basename(file_path))[0])
|
||||
@@ -350,7 +360,7 @@ def process_local_file(file_path):
|
||||
#
|
||||
|
||||
def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter,
|
||||
download_video, download_audio):
|
||||
download_video, download_audio, chunk_size):
|
||||
video_file_path = None
|
||||
try:
|
||||
results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers,
|
||||
@@ -359,21 +369,30 @@ def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_nam
|
||||
if results:
|
||||
transcription_result = results[0]
|
||||
json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json')
|
||||
prettified_json_file_path = transcription_result['audio_file'].replace('.wav', '.segments_pretty.json')
|
||||
summary_file_path = json_file_path.replace('.segments.json', '_summary.txt')
|
||||
|
||||
json_file_path = format_file_path(json_file_path)
|
||||
prettified_json_file_path = format_file_path(prettified_json_file_path)
|
||||
summary_file_path = format_file_path(summary_file_path)
|
||||
|
||||
if download_video:
|
||||
video_file_path = transcription_result['video_path'] if 'video_path' in transcription_result else None
|
||||
|
||||
formatted_transcription = format_transcription(transcription_result)
|
||||
|
||||
summary_text = transcription_result.get('summary', 'Summary not available')
|
||||
|
||||
if summary_file_path and os.path.exists(summary_file_path):
|
||||
return transcription_result[
|
||||
'transcription'], "Summary available", json_file_path, summary_file_path, video_file_path
|
||||
return formatted_transcription, summary_text, prettified_json_file_path, summary_file_path, video_file_path, None
|
||||
else:
|
||||
return transcription_result[
|
||||
'transcription'], "Summary not available", json_file_path, None, video_file_path
|
||||
return formatted_transcription, "Summary not available", prettified_json_file_path, None, video_file_path, None
|
||||
else:
|
||||
return "No results found.", "Summary not available", None, None, None
|
||||
return "No results found.", "Summary not available", None, None, None, None
|
||||
except Exception as e:
|
||||
return str(e), "Error processing the request.", None, None, None
|
||||
return str(e), "Error processing the request.", None, None, None, None
|
||||
|
||||
|
||||
|
||||
|
||||
def create_download_directory(title):
|
||||
@@ -409,8 +428,13 @@ def get_youtube(video_url):
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
logging.debug("About to extract youtube info")
|
||||
info_dict = ydl.extract_info(video_url, download=False)
|
||||
logging.debug("Youtube info successfully extracted")
|
||||
return info_dict
|
||||
logging.debug(f"Youtube info successfully extracted: {info_dict}")
|
||||
if isinstance(info_dict, dict):
|
||||
return info_dict
|
||||
else:
|
||||
logging.error("Invalid info_dict format")
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def get_playlist_videos(playlist_url):
|
||||
@@ -442,7 +466,7 @@ def download_video(video_url, download_path, info_dict, download_video_flag):
|
||||
logging.debug("About to normalize downloaded video title")
|
||||
title = normalize_title(info_dict['title'])
|
||||
|
||||
if download_video_flag == False:
|
||||
if not download_video_flag:
|
||||
file_path = os.path.join(download_path, f"{title}.m4a")
|
||||
ydl_opts = {
|
||||
'format': 'bestaudio[ext=m4a]',
|
||||
@@ -559,7 +583,6 @@ def convert_to_wav(video_file_path, offset=0):
|
||||
except Exception as e:
|
||||
logging.error("Error occurred - ffmpeg doesn't like windows")
|
||||
raise RuntimeError("ffmpeg failed")
|
||||
exit()
|
||||
elif os.name == "posix":
|
||||
os.system(f'ffmpeg -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"')
|
||||
else:
|
||||
@@ -1229,62 +1252,114 @@ def process_text(api_key, text_file):
|
||||
return "Notice:", message
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def format_file_path(file_path):
|
||||
# Helper function to check file existence and return an appropriate path or message
|
||||
return file_path if file_path and os.path.exists(file_path) else None
|
||||
|
||||
|
||||
def update_visibility(mode):
|
||||
if mode == "Advanced":
|
||||
# Show all inputs below URL
|
||||
return [gr.update(visible=True)] * 9
|
||||
else:
|
||||
# Hide all inputs below URL
|
||||
return [gr.update(visible=False)] * 9
|
||||
|
||||
# https://www.gradio.app/guides/controlling-layout
|
||||
def launch_ui(demo_mode=False):
|
||||
inputs = [
|
||||
gr.components.Textbox(label="URL (Mandatory)",
|
||||
placeholder="Enter the video URL here"),
|
||||
gr.components.Number(value=2,
|
||||
label="Number of Speakers(Optional - Currently has no effect)",
|
||||
visible=lambda x: x is not None),
|
||||
gr.components.Dropdown(choices=whisper_models,
|
||||
value="small.en",
|
||||
label="Whisper Model(This is the ML model used for transcription.)",
|
||||
visible=lambda x: x is not None),
|
||||
gr.components.Textbox(label="Custom Prompt (Customize your summary, or ask a different question)",
|
||||
placeholder="Q: As a professional summarizer, create a concise and comprehensive "
|
||||
"summary of the provided text.\nA: Here is a detailed, bulleted list of the "
|
||||
"key points made in the transcribed video and supporting arguments:",
|
||||
lines=3),
|
||||
gr.components.Number(value=0,
|
||||
label="Offset (Seconds into the video to start transcribing at)",
|
||||
visible=lambda x: x is not None),
|
||||
gr.components.Dropdown(
|
||||
choices=["huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"],
|
||||
label="API Name (Mandatory Unless you just want a Transcription)", visible=lambda x: x is not None),
|
||||
gr.components.Textbox(label="API Key (Mandatory if API Name is specified)",
|
||||
placeholder="Enter your API key here", visible=lambda x: x is not None),
|
||||
gr.components.Checkbox(label="VAD Filter(Can safely ignore)",
|
||||
value=False, visible=lambda x: x is not None),
|
||||
gr.components.Checkbox(label="Download Video(Select to allow for file download of selected video)",
|
||||
value=False, visible=lambda x: x is not None),
|
||||
gr.components.Checkbox(label="Download Audio(Select to allow for file download of selected Video's Audio)",
|
||||
value=False, visible=lambda x: x is not None)
|
||||
]
|
||||
whisper_models = ["small.en", "medium.en", "large"]
|
||||
|
||||
outputs = [
|
||||
gr.components.Textbox(label="Transcription (Resulting Transcription from your input URL)"),
|
||||
gr.components.Textbox(label="Summary or Status Message (Current status of Summary or Summary itself)"),
|
||||
gr.components.File(label="Download Transcription as JSON (Download the Transcription as a file)",
|
||||
visible=lambda x: x is not None),
|
||||
gr.components.File(label="Download Summary as Text (Download the Summary as a file)",
|
||||
visible=lambda x: x is not None),
|
||||
gr.components.File(label="Download Video (Download the Video as a file)", visible=lambda x: x is not None),
|
||||
gr.components.File(label="Download Audio (Download the Audio as a file)", visible=lambda x: x is not None)
|
||||
]
|
||||
with gr.Blocks() as iface:
|
||||
with gr.Tab("Audio Transcription + Summarization"):
|
||||
with gr.Row():
|
||||
# Light/Dark mode toggle switch
|
||||
theme_toggle = gr.Radio(choices=["Light", "Dark"], value="Light",
|
||||
label="Light/Dark Mode Toggle (Toggle to change UI color scheme)")
|
||||
|
||||
iface = gr.Interface(
|
||||
fn=process_url,
|
||||
inputs=inputs,
|
||||
outputs=outputs,
|
||||
title="Video Transcription and Summarization",
|
||||
description="Submit a video URL for transcription and summarization. Ensure you input all necessary information including API keys."
|
||||
)
|
||||
# UI Mode toggle switch
|
||||
ui_mode_toggle = gr.Radio(choices=["Simple", "Advanced"], value="Simple",
|
||||
label="UI Mode (Toggle to show all options)")
|
||||
|
||||
iface.launch(share=False)
|
||||
# URL input is always visible
|
||||
url_input = gr.Textbox(label="URL (Mandatory)", placeholder="Enter the video URL here")
|
||||
|
||||
# Inputs to be shown or hidden
|
||||
num_speakers_input = gr.Number(value=2, label="Number of Speakers(Optional - Currently has no effect)",
|
||||
visible=False)
|
||||
whisper_model_input = gr.Dropdown(choices=whisper_models, value="small.en",
|
||||
label="Whisper Model(This is the ML model used for transcription.)",
|
||||
visible=False)
|
||||
custom_prompt_input = gr.Textbox(
|
||||
label="Custom Prompt (Customize your summary, or ask a different question)",
|
||||
placeholder="Q: As a professional summarizer, create a concise and comprehensive summary of the provided text.\nA: Here is a detailed, bulleted list of the key points made in the transcribed video and supporting arguments:",
|
||||
lines=3, visible=True)
|
||||
offset_input = gr.Number(value=0, label="Offset (Seconds into the video to start transcribing at)",
|
||||
visible=False)
|
||||
api_name_input = gr.Dropdown(
|
||||
choices=[None,"huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], value=None,
|
||||
label="API Name (Mandatory Unless you just want a Transcription)", visible=True)
|
||||
api_key_input = gr.Textbox(label="API Key (Mandatory if API Name is specified)",
|
||||
placeholder="Enter your API key here", visible=True)
|
||||
vad_filter_input = gr.Checkbox(label="VAD Filter(Can safely ignore)", value=False, visible=False)
|
||||
download_video_input = gr.Checkbox(
|
||||
label="Download Video(Select to allow for file download of selected video)", value=False, visible=False)
|
||||
download_audio_input = gr.Checkbox(
|
||||
label="Download Audio(Select to allow for file download of selected Video's Audio)", value=False,
|
||||
visible=True)
|
||||
detail_level_input = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.1, interactive=False,
|
||||
label="Detail Level (Slide me)", visible=True)
|
||||
|
||||
inputs = [num_speakers_input, whisper_model_input, custom_prompt_input, offset_input, api_name_input,
|
||||
api_key_input, vad_filter_input, download_video_input, download_audio_input, detail_level_input]
|
||||
|
||||
# Function to toggle Light/Dark Mode
|
||||
def toggle_light(mode):
|
||||
dark = (mode == "Dark")
|
||||
return {"__theme": "dark" if dark else "light"}
|
||||
|
||||
# Set the event listener for the Light/Dark mode toggle switch
|
||||
theme_toggle.change(fn=toggle_light, inputs=theme_toggle, outputs=None)
|
||||
|
||||
# Function to toggle visibility of advanced inputs
|
||||
def toggle_ui(mode):
|
||||
visible = (mode == "Advanced")
|
||||
return [visible] * len(inputs)
|
||||
|
||||
# Set the event listener for the UI Mode toggle switch
|
||||
ui_mode_toggle.change(fn=toggle_ui, inputs=ui_mode_toggle, outputs=inputs)
|
||||
|
||||
# Combine URL input and inputs
|
||||
all_inputs = [url_input] + inputs
|
||||
|
||||
outputs = [
|
||||
gr.Textbox(label="Transcription (Resulting Transcription from your input URL)"),
|
||||
gr.Textbox(label="Summary or Status Message (Current status of Summary or Summary itself)"),
|
||||
gr.File(label="Download Transcription as JSON (Download the Transcription as a file)"),
|
||||
gr.File(label="Download Summary as Text (Download the Summary as a file)"),
|
||||
gr.File(label="Download Video (Download the Video as a file)"),
|
||||
gr.File(label="Download Audio (Download the Audio as a file)")
|
||||
]
|
||||
|
||||
gr.Interface(
|
||||
fn=process_url,
|
||||
inputs=all_inputs,
|
||||
outputs=outputs,
|
||||
title="Video Transcription and Summarization",
|
||||
description="Submit a video URL for transcription and summarization. Ensure you input all necessary information including API keys."
|
||||
)
|
||||
|
||||
with gr.Tab("Transcription & Summarization History"):
|
||||
image_input = gr.Image(label="Upload Image")
|
||||
image_output = gr.Image(label="Processed Image")
|
||||
|
||||
with gr.Accordion("Open for More!", open=False):
|
||||
gr.Markdown("Look at me...")
|
||||
gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.1, interactive=True, label="Slide me")
|
||||
|
||||
iface.launch(share=False)
|
||||
|
||||
#
|
||||
#
|
||||
@@ -1360,62 +1435,62 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
|
||||
logging.debug(f"MAIN: Summarization being performed by {api_name}")
|
||||
json_file_path = audio_file.replace('.wav', '.segments.json')
|
||||
if api_name.lower() == 'openai':
|
||||
api_key = openai_api_key
|
||||
openai_api_key = api_key if api_key else config.get('API', 'openai_api_key', fallback=None)
|
||||
try:
|
||||
logging.debug(f"MAIN: trying to summarize with openAI")
|
||||
summary = summarize_with_openai(api_key, json_file_path, openai_model, custom_prompt)
|
||||
summary = summarize_with_openai(openai_api_key, json_file_path, openai_model, custom_prompt)
|
||||
except requests.exceptions.ConnectionError:
|
||||
requests.status_code = "Connection: "
|
||||
elif api_name.lower() == "anthropic":
|
||||
api_key = anthropic_api_key
|
||||
anthropic_api_key = api_key if api_key else config.get('API', 'anthropic_api_key', fallback=None)
|
||||
try:
|
||||
logging.debug(f"MAIN: Trying to summarize with anthropic")
|
||||
summary = summarize_with_claude(api_key, json_file_path, anthropic_model, custom_prompt)
|
||||
summary = summarize_with_claude(anthropic_api_key, json_file_path, anthropic_model, custom_prompt)
|
||||
except requests.exceptions.ConnectionError:
|
||||
requests.status_code = "Connection: "
|
||||
elif api_name.lower() == "cohere":
|
||||
api_key = cohere_api_key
|
||||
cohere_api_key = api_key if api_key else config.get('API', 'cohere_api_key', fallback=None)
|
||||
try:
|
||||
logging.debug(f"MAIN: Trying to summarize with cohere")
|
||||
summary = summarize_with_cohere(api_key, json_file_path, cohere_model, custom_prompt)
|
||||
summary = summarize_with_cohere(cohere_api_key, json_file_path, cohere_model, custom_prompt)
|
||||
except requests.exceptions.ConnectionError:
|
||||
requests.status_code = "Connection: "
|
||||
elif api_name.lower() == "groq":
|
||||
api_key = groq_api_key
|
||||
groq_api_key = api_key if api_key else config.get('API', 'groq_api_key', fallback=None)
|
||||
try:
|
||||
logging.debug(f"MAIN: Trying to summarize with Groq")
|
||||
summary = summarize_with_groq(api_key, json_file_path, groq_model, custom_prompt)
|
||||
summary = summarize_with_groq(groq_api_key, json_file_path, groq_model, custom_prompt)
|
||||
except requests.exceptions.ConnectionError:
|
||||
requests.status_code = "Connection: "
|
||||
elif api_name.lower() == "llama":
|
||||
token = llama_api_key
|
||||
llama_token = api_key if api_key else config.get('API', 'llama_api_key', fallback=None)
|
||||
llama_ip = llama_api_IP
|
||||
try:
|
||||
logging.debug(f"MAIN: Trying to summarize with Llama.cpp")
|
||||
summary = summarize_with_llama(llama_ip, json_file_path, token, custom_prompt)
|
||||
summary = summarize_with_llama(llama_ip, json_file_path, llama_token, custom_prompt)
|
||||
except requests.exceptions.ConnectionError:
|
||||
requests.status_code = "Connection: "
|
||||
elif api_name.lower() == "kobold":
|
||||
token = kobold_api_key
|
||||
kobold_token = api_key if api_key else config.get('API', 'kobold_api_key', fallback=None)
|
||||
kobold_ip = kobold_api_IP
|
||||
try:
|
||||
logging.debug(f"MAIN: Trying to summarize with kobold.cpp")
|
||||
summary = summarize_with_kobold(kobold_ip, json_file_path, custom_prompt)
|
||||
summary = summarize_with_kobold(kobold_ip, json_file_path, kobold_token, custom_prompt)
|
||||
except requests.exceptions.ConnectionError:
|
||||
requests.status_code = "Connection: "
|
||||
elif api_name.lower() == "ooba":
|
||||
token = ooba_api_key
|
||||
ooba_token = api_key if api_key else config.get('API', 'ooba_api_key', fallback=None)
|
||||
ooba_ip = ooba_api_IP
|
||||
try:
|
||||
logging.debug(f"MAIN: Trying to summarize with oobabooga")
|
||||
summary = summarize_with_oobabooga(ooba_ip, json_file_path, custom_prompt)
|
||||
summary = summarize_with_oobabooga(ooba_ip, json_file_path, ooba_token, custom_prompt)
|
||||
except requests.exceptions.ConnectionError:
|
||||
requests.status_code = "Connection: "
|
||||
elif api_name.lower() == "huggingface":
|
||||
api_key = huggingface_api_key
|
||||
huggingface_api_key = api_key if api_key else config.get('API', 'huggingface_api_key', fallback=None)
|
||||
try:
|
||||
logging.debug(f"MAIN: Trying to summarize with huggingface")
|
||||
summarize_with_huggingface(api_key, json_file_path, custom_prompt)
|
||||
summarize_with_huggingface(huggingface_api_key, json_file_path, custom_prompt)
|
||||
except requests.exceptions.ConnectionError:
|
||||
requests.status_code = "Connection: "
|
||||
|
||||
@@ -1434,10 +1509,10 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
|
||||
except Exception as e:
|
||||
logging.error(f"Error processing path: {path}")
|
||||
logging.error(str(e))
|
||||
end_time = time.monotonic()
|
||||
# print("Total program execution time: " + timedelta(seconds=end_time - start_time))
|
||||
# end_time = time.monotonic()
|
||||
# print("Total program execution time: " + timedelta(seconds=end_time - start_time))
|
||||
|
||||
return results
|
||||
return results
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1253,15 +1253,15 @@ def launch_ui(demo_mode=False):
|
||||
offset_input = gr.Number(value=0, label="Offset (Seconds into the video to start transcribing at)",
|
||||
visible=True)
|
||||
api_name_input = gr.Dropdown(
|
||||
choices=["huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], value=None,
|
||||
choices=[None,"huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], value=None,
|
||||
label="API Name (Mandatory Unless you just want a Transcription)", visible=True)
|
||||
api_key_input = gr.Textbox(label="API Key (Mandatory if API Name is specified)",
|
||||
placeholder="Enter your API key here", visible=True)
|
||||
vad_filter_input = gr.Checkbox(label="VAD Filter(Can safely ignore)", value=False, visible=True)
|
||||
download_video_input = gr.Checkbox(
|
||||
label="Download Video(Select to allow for file download of selected video)", value=True, visible=True)
|
||||
label="Download Video(Select to allow for file download of selected video)", value=False, visible=True)
|
||||
download_audio_input = gr.Checkbox(
|
||||
label="Download Audio(Select to allow for file download of selected Video's Audio)", value=False,
|
||||
label="Download Audio(Select to allow for file download of selected Video's Audio)", value=True,
|
||||
visible=True)
|
||||
detail_level_input = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.1, interactive=True,
|
||||
label="Detail Level (Slide me)", visible=True)
|
||||
|
||||
Reference in New Issue
Block a user