From 72020e0316f7c04cf268ada9d47e230594ce6d34 Mon Sep 17 00:00:00 2001 From: Robert Date: Wed, 8 May 2024 21:39:12 -0700 Subject: [PATCH] Got ooba working. --- Tests/list_of_videos.txt | 3 + .../test_transcription_summarization.py | 0 config.txt | 2 +- diarize.py | 138 +++++++++--------- {params => tldw-scripts/params}/summary.json | 0 5 files changed, 72 insertions(+), 71 deletions(-) create mode 100644 Tests/list_of_videos.txt rename test_transcription_summarization.py => Tests/test_transcription_summarization.py (100%) rename {params => tldw-scripts/params}/summary.json (100%) diff --git a/Tests/list_of_videos.txt b/Tests/list_of_videos.txt new file mode 100644 index 0000000..95cf2af --- /dev/null +++ b/Tests/list_of_videos.txt @@ -0,0 +1,3 @@ +https://www.youtube.com/shorts/siPhZvKk0xE +https://www.youtube.com/shorts/oNM-YLoVMKI +https://www.youtube.com/shorts/quuWzw2Ih6M \ No newline at end of file diff --git a/test_transcription_summarization.py b/Tests/test_transcription_summarization.py similarity index 100% rename from test_transcription_summarization.py rename to Tests/test_transcription_summarization.py diff --git a/config.txt b/config.txt index 851f8b8..303df5c 100644 --- a/config.txt +++ b/config.txt @@ -14,7 +14,7 @@ kobold_api_IP = http://127.0.0.1:5001/api/v1/generate llama_api_key = llama_api_IP = http://127.0.0.1:8080/completion ooba_api_key = -ooba_api_IP = http://127.0.0.1:5000/api/v1/generate +ooba_api_IP = http://127.0.0.1:5000/v1/chat/completions [Paths] output_path = Results diff --git a/diarize.py b/diarize.py index 0a7bf80..6dd19fd 100644 --- a/diarize.py +++ b/diarize.py @@ -81,7 +81,7 @@ kobold_api_IP = config.get('Local-API', 'kobold_api_IP', fallback='http://127.0. kobold_api_key = config.get('Local-API', 'kobold_api_key', fallback='') llama_api_IP = config.get('Local-API', 'llama_api_IP', fallback='http://127.0.0.1:8080/v1/chat/completions') llama_api_key = config.get('Local-API', 'llama_api_key', fallback='') -ooba_api_IP = config.get('Local-API', 'ooba_api_IP', fallback='http://127.0.0.1:5000/api/v1/generate') +ooba_api_IP = config.get('Local-API', 'ooba_api_IP', fallback='http://127.0.0.1:5000/v1/chat/completions') ooba_api_key = config.get('Local-API', 'ooba_api_key', fallback='') # Retrieve output paths from the configuration file @@ -1041,60 +1041,47 @@ def summarize_with_kobold(api_url, file_path): # https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API def summarize_with_oobabooga(api_url, file_path): try: - logging.debug("oobabooga: Loading JSON data") + logging.debug("ooba: Loading JSON data") with open(file_path, 'r') as file: segments = json.load(file) - logging.debug(f"oobabooga: Extracting text from segments file") + logging.debug(f"ooba: Extracting text from segments file\n\n\n") text = extract_text_from_segments(segments) + logging.debug(f"ooba: Finished extracting text from segments file") headers = { 'accept': 'application/json', 'content-type': 'application/json', } - prompt_text = f"{text} \n\nAs a professional summarizer, create a concise and comprehensive summary of the provided text." - data = { - "prompt": prompt_text, - "max_new_tokens": 500, - "do_sample": True, - "temperature": 0.7, - "top_p": 0.9, - "typical_p": 1, - "repetition_penalty": 1.05, - "top_k": 40, - "min_length": 0, - "no_repeat_ngram_size": 0, - "num_beams": 1, - "penalty_alpha": 0, - "length_penalty": 1, - "early_stopping": False, - "seed": -1, - "add_bos_token": True, - "truncation_length": 2048, - "ban_eos_token": False, - "skip_special_tokens": True, - "stopping_strings": [] + prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a french bakery baking cakes. It is a fun job. I have been baking cakes for ten years. I also bake lots of other baked goods, but cakes are my favorite." + # prompt_text += f"\n\n{text}" # Uncomment this line if you want to include the text variable + prompt_text += "\n\nAs a professional summarizer, create a concise and comprehensive summary of the provided text." + + data = { + "mode": "chat", + "character": "Example", + "messages": [{"role": "user", "content": prompt_text}] } - logging.debug("oobabooga: Submitting request to API endpoint") - print("oobabooga: Submitting request to API endpoint") - response = requests.post(api_url, headers=headers, json=data) - response_data = response.json() - logging.debug("API Response Data: %s", response_data) + logging.debug("ooba: Submitting request to API endpoint") + print("ooba: Submitting request to API endpoint") + response = requests.post(api_url, headers=headers, json=data, verify=False) + logging.debug("ooba: API Response Data: %s", response) if response.status_code == 200: - summary = response_data['results'][0]['text'].strip() - logging.debug("oobabooga: Summarization successful") + response_data = response.json() + summary = response.json()['choices'][0]['message']['content'] + logging.debug("ooba: Summarization successful") print("Summarization successful.") return summary else: logging.error(f"oobabooga: API request failed with status code {response.status_code}: {response.text}") - return f"oobabooga: API request failed: {response.text}" + return f"ooba: API request failed with status code {response.status_code}: {response.text}" except Exception as e: - logging.error("oobabooga: Error in processing: %s", str(e)) - return f"oobabooga: Error occurred while processing summary with oobabooga: {str(e)}" + logging.error("ooba: Error in processing: %s", str(e)) + return f"ooba: Error occurred while processing summary with oobabooga: {str(e)}" @@ -1121,17 +1108,18 @@ def save_summary_to_file(summary, file_path): def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, download_video_flag=False): start_time = time.monotonic() - paths = [] if os.path.isfile(input_path) and input_path.endswith('.txt'): logging.debug("MAIN: User passed in a text file, processing text file...") paths = read_paths_from_file(input_path) + elif os.path.exists(input_path): + logging.debug("MAIN: Local file path detected") + paths = [input_path] + elif (info_dict := get_youtube(input_path)) and 'entries' in info_dict: + logging.debug("MAIN: YouTube playlist detected") + print("\n\nSorry, but playlists aren't currently supported. You can run the following command to generate a text file that you can then pass into this script though!" + """\n\n\tpython Get_Playlist_URLs.py \n\n\tThen,\n\n\tpython diarizer.py \n\n""") + return else: - info_dict = get_youtube(input_path) - if 'entries' in info_dict: # Check if the input is a playlist - logging.debug("MAIN: YouTube playlist detected") - print("\n\nSorry, but playlists aren't currently supported. You can run the following command to generate a text file that you can then pass into this script though!" + """\n\n\tpython Get_Playlist_URLs.py \n\n\tThen,\n\n\tpython diarizer.py \n\n""") - else: - paths = [input_path] + paths = [input_path] results = [] @@ -1141,26 +1129,15 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model= logging.debug("MAIN: URL Detected") info_dict = get_youtube(path) if info_dict: - if 'entries' in info_dict: # Check if the input is a playlist - logging.debug("MAIN: Creating path for video file...") - download_path = create_download_directory(info_dict['title']) - logging.debug("MAIN: Path created successfully") - logging.debug("MAIN: Downloading video from yt_dlp...") - video_path = download_video(path, download_path, info_dict, download_video_flag) - logging.debug("MAIN: Video downloaded successfully") - logging.debug("MAIN: Converting video file to WAV...") - audio_file = convert_to_wav(video_path, offset) - logging.debug("MAIN: Audio file converted succesfully") - else: - logging.debug("MAIN: Creating path for video file...") - download_path = create_download_directory(info_dict['title']) - logging.debug("MAIN: Path created successfully") - logging.debug("MAIN: Downloading video from yt_dlp...") - video_path = download_video(path, download_path, info_dict, download_video_flag) - logging.debug("MAIN: Video downloaded successfully") - logging.debug("MAIN: Converting video file to WAV...") - audio_file = convert_to_wav(video_path, offset) - logging.debug("MAIN: Audio file converted succesfully") + logging.debug("MAIN: Creating path for video file...") + download_path = create_download_directory(info_dict['title']) + logging.debug("MAIN: Path created successfully") + logging.debug("MAIN: Downloading video from yt_dlp...") + video_path = download_video(path, download_path, info_dict, download_video_flag) + logging.debug("MAIN: Video downloaded successfully") + logging.debug("MAIN: Converting video file to WAV...") + audio_file = convert_to_wav(video_path, offset) + logging.debug("MAIN: Audio file converted succesfully") else: if os.path.exists(path): logging.debug("MAIN: Local file path detected") @@ -1186,28 +1163,49 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model= json_file_path = audio_file.replace('.wav', '.segments.json') if api_name.lower() == 'openai': api_key = openai_api_key - summary = summarize_with_openai(api_key, json_file_path, openai_model) + try: + summary = summarize_with_openai(api_key, json_file_path, openai_model) + except requests.exceptions.ConnectionError: + r.status_code = "Connection: " elif api_name.lower() == 'anthropic': api_key = anthropic_api_key - summary = summarize_with_claude(api_key, json_file_path, anthropic_model) + try: + summary = summarize_with_claude(api_key, json_file_path, anthropic_model) + except requests.exceptions.ConnectionError: + r.status_code = "Connection: " elif api_name.lower() == 'cohere': api_key = cohere_api_key - summary = summarize_with_cohere(api_key, json_file_path, cohere_model) + try: + summary = summarize_with_cohere(api_key, json_file_path, cohere_model) + except requests.exceptions.ConnectionError: + r.status_code = "Connection: " elif api_name.lower() == 'groq': api_key = groq_api_key - summary = summarize_with_groq(api_key, json_file_path, groq_model) + try: + summary = summarize_with_groq(api_key, json_file_path, groq_model) + except requests.exceptions.ConnectionError: + r.status_code = "Connection: " elif api_name.lower() == 'llama': token = llama_api_key llama_ip = llama_api_IP - summary = summarize_with_llama(llama_ip, json_file_path, token) + try: + summary = summarize_with_llama(llama_ip, json_file_path, token) + except requests.exceptions.ConnectionError: + r.status_code = "Connection: " elif api_name.lower() == 'kobold': token = kobold_api_key kobold_ip = kobold_api_IP - summary = summarize_with_kobold(kobold_ip, json_file_path) - elif api_name.lower() == 'oobabooga': + try: + summary = summarize_with_kobold(kobold_ip, json_file_path) + except requests.exceptions.ConnectionError: + r.status_code = "Connection: " + elif api_name.lower() == 'ooba': token = ooba_api_key ooba_ip = ooba_api_IP - summary = summarize_with_oobabooga(oobabooga_ip, json_file_path) + try: + summary = summarize_with_oobabooga(ooba_ip, json_file_path) + except requests.exceptions.ConnectionError: + r.status_code = "Connection: " else: logging.warning(f"Unsupported API: {api_name}") summary = None diff --git a/params/summary.json b/tldw-scripts/params/summary.json similarity index 100% rename from params/summary.json rename to tldw-scripts/params/summary.json