Robert
2024-05-13 23:03:24 -07:00
parent 3e12a4958a
commit b93248ea6e
2 changed files with 158 additions and 83 deletions

235
HF/app.py
View File

@@ -17,6 +17,8 @@ import gradio as gr
import torch
import yt_dlp
log_level = "INFO"
logging.basicConfig(level=getattr(logging, log_level), format='%(asctime)s - %(levelname)s - %(message)s')
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
#######
# Function Sections
@@ -123,6 +125,9 @@ processing_choice = config.get('Processing', 'processing_choice', fallback='cpu'
# Log file
# logging.basicConfig(filename='debug-runtime.log', encoding='utf-8', level=logging.DEBUG)
# API Key Shenanigans
api_key = "UNSET"
#
#
#######################
@@ -318,7 +323,12 @@ def process_path(path):
""" Decides whether the path is a URL or a local file and processes accordingly. """
if path.startswith('http'):
logging.debug("file is a URL")
return get_youtube(path) # For YouTube URLs, modify to download and extract info
info_dict = get_youtube(path)
if info_dict:
return info_dict
else:
logging.error("Failed to get Video info")
return None
elif os.path.exists(path):
logging.debug("File is a path")
return process_local_file(path) # For local files, define a function to handle them
@@ -327,7 +337,7 @@ def process_path(path):
return None
# FIXME
#
def process_local_file(file_path):
logging.info(f"Processing local file: {file_path}")
title = normalize_title(os.path.splitext(os.path.basename(file_path))[0])
@@ -350,7 +360,7 @@ def process_local_file(file_path):
#
def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter,
download_video, download_audio):
download_video, download_audio, chunk_size):
video_file_path = None
try:
results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers,
@@ -359,21 +369,30 @@ def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_nam
if results:
transcription_result = results[0]
json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json')
prettified_json_file_path = transcription_result['audio_file'].replace('.wav', '.segments_pretty.json')
summary_file_path = json_file_path.replace('.segments.json', '_summary.txt')
json_file_path = format_file_path(json_file_path)
prettified_json_file_path = format_file_path(prettified_json_file_path)
summary_file_path = format_file_path(summary_file_path)
if download_video:
video_file_path = transcription_result['video_path'] if 'video_path' in transcription_result else None
formatted_transcription = format_transcription(transcription_result)
summary_text = transcription_result.get('summary', 'Summary not available')
if summary_file_path and os.path.exists(summary_file_path):
return transcription_result[
'transcription'], "Summary available", json_file_path, summary_file_path, video_file_path
return formatted_transcription, summary_text, prettified_json_file_path, summary_file_path, video_file_path, None
else:
return transcription_result[
'transcription'], "Summary not available", json_file_path, None, video_file_path
return formatted_transcription, "Summary not available", prettified_json_file_path, None, video_file_path, None
else:
return "No results found.", "Summary not available", None, None, None
return "No results found.", "Summary not available", None, None, None, None
except Exception as e:
return str(e), "Error processing the request.", None, None, None
return str(e), "Error processing the request.", None, None, None, None
def create_download_directory(title):
@@ -409,8 +428,13 @@ def get_youtube(video_url):
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
logging.debug("About to extract youtube info")
info_dict = ydl.extract_info(video_url, download=False)
logging.debug("Youtube info successfully extracted")
return info_dict
logging.debug(f"Youtube info successfully extracted: {info_dict}")
if isinstance(info_dict, dict):
return info_dict
else:
logging.error("Invalid info_dict format")
return None
def get_playlist_videos(playlist_url):
@@ -442,7 +466,7 @@ def download_video(video_url, download_path, info_dict, download_video_flag):
logging.debug("About to normalize downloaded video title")
title = normalize_title(info_dict['title'])
if download_video_flag == False:
if not download_video_flag:
file_path = os.path.join(download_path, f"{title}.m4a")
ydl_opts = {
'format': 'bestaudio[ext=m4a]',
@@ -559,7 +583,6 @@ def convert_to_wav(video_file_path, offset=0):
except Exception as e:
logging.error("Error occurred - ffmpeg doesn't like windows")
raise RuntimeError("ffmpeg failed")
exit()
elif os.name == "posix":
os.system(f'ffmpeg -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"')
else:
@@ -1229,62 +1252,114 @@ def process_text(api_key, text_file):
return "Notice:", message
def format_file_path(file_path):
# Helper function to check file existence and return an appropriate path or message
return file_path if file_path and os.path.exists(file_path) else None
def update_visibility(mode):
if mode == "Advanced":
# Show all inputs below URL
return [gr.update(visible=True)] * 9
else:
# Hide all inputs below URL
return [gr.update(visible=False)] * 9
# https://www.gradio.app/guides/controlling-layout
def launch_ui(demo_mode=False):
inputs = [
gr.components.Textbox(label="URL (Mandatory)",
placeholder="Enter the video URL here"),
gr.components.Number(value=2,
label="Number of Speakers(Optional - Currently has no effect)",
visible=lambda x: x is not None),
gr.components.Dropdown(choices=whisper_models,
value="small.en",
label="Whisper Model(This is the ML model used for transcription.)",
visible=lambda x: x is not None),
gr.components.Textbox(label="Custom Prompt (Customize your summary, or ask a different question)",
placeholder="Q: As a professional summarizer, create a concise and comprehensive "
"summary of the provided text.\nA: Here is a detailed, bulleted list of the "
"key points made in the transcribed video and supporting arguments:",
lines=3),
gr.components.Number(value=0,
label="Offset (Seconds into the video to start transcribing at)",
visible=lambda x: x is not None),
gr.components.Dropdown(
choices=["huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"],
label="API Name (Mandatory Unless you just want a Transcription)", visible=lambda x: x is not None),
gr.components.Textbox(label="API Key (Mandatory if API Name is specified)",
placeholder="Enter your API key here", visible=lambda x: x is not None),
gr.components.Checkbox(label="VAD Filter(Can safely ignore)",
value=False, visible=lambda x: x is not None),
gr.components.Checkbox(label="Download Video(Select to allow for file download of selected video)",
value=False, visible=lambda x: x is not None),
gr.components.Checkbox(label="Download Audio(Select to allow for file download of selected Video's Audio)",
value=False, visible=lambda x: x is not None)
]
whisper_models = ["small.en", "medium.en", "large"]
outputs = [
gr.components.Textbox(label="Transcription (Resulting Transcription from your input URL)"),
gr.components.Textbox(label="Summary or Status Message (Current status of Summary or Summary itself)"),
gr.components.File(label="Download Transcription as JSON (Download the Transcription as a file)",
visible=lambda x: x is not None),
gr.components.File(label="Download Summary as Text (Download the Summary as a file)",
visible=lambda x: x is not None),
gr.components.File(label="Download Video (Download the Video as a file)", visible=lambda x: x is not None),
gr.components.File(label="Download Audio (Download the Audio as a file)", visible=lambda x: x is not None)
]
with gr.Blocks() as iface:
with gr.Tab("Audio Transcription + Summarization"):
with gr.Row():
# Light/Dark mode toggle switch
theme_toggle = gr.Radio(choices=["Light", "Dark"], value="Light",
label="Light/Dark Mode Toggle (Toggle to change UI color scheme)")
iface = gr.Interface(
fn=process_url,
inputs=inputs,
outputs=outputs,
title="Video Transcription and Summarization",
description="Submit a video URL for transcription and summarization. Ensure you input all necessary information including API keys."
)
# UI Mode toggle switch
ui_mode_toggle = gr.Radio(choices=["Simple", "Advanced"], value="Simple",
label="UI Mode (Toggle to show all options)")
iface.launch(share=False)
# URL input is always visible
url_input = gr.Textbox(label="URL (Mandatory)", placeholder="Enter the video URL here")
# Inputs to be shown or hidden
num_speakers_input = gr.Number(value=2, label="Number of Speakers(Optional - Currently has no effect)",
visible=False)
whisper_model_input = gr.Dropdown(choices=whisper_models, value="small.en",
label="Whisper Model(This is the ML model used for transcription.)",
visible=False)
custom_prompt_input = gr.Textbox(
label="Custom Prompt (Customize your summary, or ask a different question)",
placeholder="Q: As a professional summarizer, create a concise and comprehensive summary of the provided text.\nA: Here is a detailed, bulleted list of the key points made in the transcribed video and supporting arguments:",
lines=3, visible=True)
offset_input = gr.Number(value=0, label="Offset (Seconds into the video to start transcribing at)",
visible=False)
api_name_input = gr.Dropdown(
choices=[None,"huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], value=None,
label="API Name (Mandatory Unless you just want a Transcription)", visible=True)
api_key_input = gr.Textbox(label="API Key (Mandatory if API Name is specified)",
placeholder="Enter your API key here", visible=True)
vad_filter_input = gr.Checkbox(label="VAD Filter(Can safely ignore)", value=False, visible=False)
download_video_input = gr.Checkbox(
label="Download Video(Select to allow for file download of selected video)", value=False, visible=False)
download_audio_input = gr.Checkbox(
label="Download Audio(Select to allow for file download of selected Video's Audio)", value=False,
visible=True)
detail_level_input = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.1, interactive=False,
label="Detail Level (Slide me)", visible=True)
inputs = [num_speakers_input, whisper_model_input, custom_prompt_input, offset_input, api_name_input,
api_key_input, vad_filter_input, download_video_input, download_audio_input, detail_level_input]
# Function to toggle Light/Dark Mode
def toggle_light(mode):
dark = (mode == "Dark")
return {"__theme": "dark" if dark else "light"}
# Set the event listener for the Light/Dark mode toggle switch
theme_toggle.change(fn=toggle_light, inputs=theme_toggle, outputs=None)
# Function to toggle visibility of advanced inputs
def toggle_ui(mode):
visible = (mode == "Advanced")
return [visible] * len(inputs)
# Set the event listener for the UI Mode toggle switch
ui_mode_toggle.change(fn=toggle_ui, inputs=ui_mode_toggle, outputs=inputs)
# Combine URL input and inputs
all_inputs = [url_input] + inputs
outputs = [
gr.Textbox(label="Transcription (Resulting Transcription from your input URL)"),
gr.Textbox(label="Summary or Status Message (Current status of Summary or Summary itself)"),
gr.File(label="Download Transcription as JSON (Download the Transcription as a file)"),
gr.File(label="Download Summary as Text (Download the Summary as a file)"),
gr.File(label="Download Video (Download the Video as a file)"),
gr.File(label="Download Audio (Download the Audio as a file)")
]
gr.Interface(
fn=process_url,
inputs=all_inputs,
outputs=outputs,
title="Video Transcription and Summarization",
description="Submit a video URL for transcription and summarization. Ensure you input all necessary information including API keys."
)
with gr.Tab("Transcription & Summarization History"):
image_input = gr.Image(label="Upload Image")
image_output = gr.Image(label="Processed Image")
with gr.Accordion("Open for More!", open=False):
gr.Markdown("Look at me...")
gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.1, interactive=True, label="Slide me")
iface.launch(share=False)
#
#
@@ -1360,62 +1435,62 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
logging.debug(f"MAIN: Summarization being performed by {api_name}")
json_file_path = audio_file.replace('.wav', '.segments.json')
if api_name.lower() == 'openai':
api_key = openai_api_key
openai_api_key = api_key if api_key else config.get('API', 'openai_api_key', fallback=None)
try:
logging.debug(f"MAIN: trying to summarize with openAI")
summary = summarize_with_openai(api_key, json_file_path, openai_model, custom_prompt)
summary = summarize_with_openai(openai_api_key, json_file_path, openai_model, custom_prompt)
except requests.exceptions.ConnectionError:
requests.status_code = "Connection: "
elif api_name.lower() == "anthropic":
api_key = anthropic_api_key
anthropic_api_key = api_key if api_key else config.get('API', 'anthropic_api_key', fallback=None)
try:
logging.debug(f"MAIN: Trying to summarize with anthropic")
summary = summarize_with_claude(api_key, json_file_path, anthropic_model, custom_prompt)
summary = summarize_with_claude(anthropic_api_key, json_file_path, anthropic_model, custom_prompt)
except requests.exceptions.ConnectionError:
requests.status_code = "Connection: "
elif api_name.lower() == "cohere":
api_key = cohere_api_key
cohere_api_key = api_key if api_key else config.get('API', 'cohere_api_key', fallback=None)
try:
logging.debug(f"MAIN: Trying to summarize with cohere")
summary = summarize_with_cohere(api_key, json_file_path, cohere_model, custom_prompt)
summary = summarize_with_cohere(cohere_api_key, json_file_path, cohere_model, custom_prompt)
except requests.exceptions.ConnectionError:
requests.status_code = "Connection: "
elif api_name.lower() == "groq":
api_key = groq_api_key
groq_api_key = api_key if api_key else config.get('API', 'groq_api_key', fallback=None)
try:
logging.debug(f"MAIN: Trying to summarize with Groq")
summary = summarize_with_groq(api_key, json_file_path, groq_model, custom_prompt)
summary = summarize_with_groq(groq_api_key, json_file_path, groq_model, custom_prompt)
except requests.exceptions.ConnectionError:
requests.status_code = "Connection: "
elif api_name.lower() == "llama":
token = llama_api_key
llama_token = api_key if api_key else config.get('API', 'llama_api_key', fallback=None)
llama_ip = llama_api_IP
try:
logging.debug(f"MAIN: Trying to summarize with Llama.cpp")
summary = summarize_with_llama(llama_ip, json_file_path, token, custom_prompt)
summary = summarize_with_llama(llama_ip, json_file_path, llama_token, custom_prompt)
except requests.exceptions.ConnectionError:
requests.status_code = "Connection: "
elif api_name.lower() == "kobold":
token = kobold_api_key
kobold_token = api_key if api_key else config.get('API', 'kobold_api_key', fallback=None)
kobold_ip = kobold_api_IP
try:
logging.debug(f"MAIN: Trying to summarize with kobold.cpp")
summary = summarize_with_kobold(kobold_ip, json_file_path, custom_prompt)
summary = summarize_with_kobold(kobold_ip, json_file_path, kobold_token, custom_prompt)
except requests.exceptions.ConnectionError:
requests.status_code = "Connection: "
elif api_name.lower() == "ooba":
token = ooba_api_key
ooba_token = api_key if api_key else config.get('API', 'ooba_api_key', fallback=None)
ooba_ip = ooba_api_IP
try:
logging.debug(f"MAIN: Trying to summarize with oobabooga")
summary = summarize_with_oobabooga(ooba_ip, json_file_path, custom_prompt)
summary = summarize_with_oobabooga(ooba_ip, json_file_path, ooba_token, custom_prompt)
except requests.exceptions.ConnectionError:
requests.status_code = "Connection: "
elif api_name.lower() == "huggingface":
api_key = huggingface_api_key
huggingface_api_key = api_key if api_key else config.get('API', 'huggingface_api_key', fallback=None)
try:
logging.debug(f"MAIN: Trying to summarize with huggingface")
summarize_with_huggingface(api_key, json_file_path, custom_prompt)
summarize_with_huggingface(huggingface_api_key, json_file_path, custom_prompt)
except requests.exceptions.ConnectionError:
requests.status_code = "Connection: "
@@ -1434,10 +1509,10 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
except Exception as e:
logging.error(f"Error processing path: {path}")
logging.error(str(e))
end_time = time.monotonic()
# print("Total program execution time: " + timedelta(seconds=end_time - start_time))
# end_time = time.monotonic()
# print("Total program execution time: " + timedelta(seconds=end_time - start_time))
return results
return results
if __name__ == "__main__":

View File

@@ -1253,15 +1253,15 @@ def launch_ui(demo_mode=False):
offset_input = gr.Number(value=0, label="Offset (Seconds into the video to start transcribing at)",
visible=True)
api_name_input = gr.Dropdown(
choices=["huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], value=None,
choices=[None,"huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], value=None,
label="API Name (Mandatory Unless you just want a Transcription)", visible=True)
api_key_input = gr.Textbox(label="API Key (Mandatory if API Name is specified)",
placeholder="Enter your API key here", visible=True)
vad_filter_input = gr.Checkbox(label="VAD Filter(Can safely ignore)", value=False, visible=True)
download_video_input = gr.Checkbox(
label="Download Video(Select to allow for file download of selected video)", value=True, visible=True)
label="Download Video(Select to allow for file download of selected video)", value=False, visible=True)
download_audio_input = gr.Checkbox(
label="Download Audio(Select to allow for file download of selected Video's Audio)", value=False,
label="Download Audio(Select to allow for file download of selected Video's Audio)", value=True,
visible=True)
detail_level_input = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.1, interactive=True,
label="Detail Level (Slide me)", visible=True)