More app.py fixes...

2026-03-10 08:51:17 +00:00 · 2024-05-11 22:25:29 -07:00
parent 143dbad761
commit bb1acc42b9
4 changed files with 300 additions and 56 deletions
--- a/.gitignore
+++ b/.gitignore
--- a/HF/app.py
+++ b/HF/app.py
@@ -39,7 +39,7 @@ import yt_dlp
 # 2. Usage of/Hardcoding HF_TOKEN as token for API calls
 # 3. Usage of HuggingFace for Inference
 # 4. Other stuff I can't remember. Will eventually do a diff and document them.
-# 
+#


 ####
@@ -63,10 +63,10 @@ import yt_dlp
 # llama.cpp)/`ooba` (oobabooga/text-gen-webui)/`kobold` (kobold.cpp)/`tabby` (Tabbyapi)) API:** python summarize.py
 # -v https://www.youtube.com/watch?v=4nd1CDZP21s -api <your choice of API>` - Make sure to put your API key into
 # `config.txt` under the appropriate API variable
-# 
+#
 #   Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
 #       python summarize.py ./local/file_on_your/system --api_name <API_name>`
-# 
+#
 #   Run it as a WebApp**
 #       python summarize.py -gui` - This requires you to either stuff your API keys into the `config.txt` file, or pass them into the app every time you want to use it.
 #           Can be helpful for setting up a shared instance, but not wanting people to perform inference on your server.
@@ -120,7 +120,7 @@ output_path = config.get('Paths', 'output_path', fallback='results')
 processing_choice = config.get('Processing', 'processing_choice', fallback='cpu')

 # Log file
-#logging.basicConfig(filename='debug-runtime.log', encoding='utf-8', level=logging.DEBUG)
+# logging.basicConfig(filename='debug-runtime.log', encoding='utf-8', level=logging.DEBUG)

 #
 #
@@ -148,8 +148,8 @@ print(r"""
  | |  | |      / /  | | | || |/\| |                                
  | |  | |____ / /   | |/ / \  /\  / _                              
  \_/  \_____//_/    |___/   \/  \/ (_)                             
-                                                                    
-                                                                    
+
+
 _                   _                                              
 | |                 | |                                             
 | |_   ___    ___   | |  ___   _ __    __ _                         
@@ -168,8 +168,8 @@ print(r"""

 ####################################################################################################################################
 # System Checks
-# 
-# 
+#
+#

 # Perform Platform Check
 userOS = ""
@@ -291,13 +291,13 @@ def download_ffmpeg():


 #
-# 
+#
 ####################################################################################################################################


 ####################################################################################################################################
 # Processing Paths and local file handling
-# 
+#
 #

 def read_paths_from_file(file_path):
@@ -374,7 +374,7 @@ def process_url(input_path, num_speakers=2, whisper_model="small.en", custom_pro
                return json_data, summary_file_path, json_file_path, summary_file_path

            else:
-                return json_data, "Summary not available.", json_file_path, None
+                return json_data, "Summary not available.", json_file_path, "Summary not available."

        else:
            return None, "No results found.", None, None
@@ -508,8 +508,8 @@ def download_video(video_url, download_path, info_dict, download_video_flag):
            ]
            subprocess.run(ffmpeg_command, check=True)
        else:
-            logging.error("You shouldn't be here...")
-            exit()
+            logging.error("ffmpeg: Unsupported operating system for video download and merging.")
+            raise RuntimeError("ffmpeg: Unsupported operating system for video download and merging.")
        os.remove(video_file_path)
        os.remove(audio_file_path)

@@ -529,7 +529,7 @@ def download_video(video_url, download_path, info_dict, download_video_flag):
 #       https://www.gyan.dev/ffmpeg/builds/
 #

-#os.system(r'.\Bin\ffmpeg.exe -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"')
+# os.system(r'.\Bin\ffmpeg.exe -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"')
 def convert_to_wav(video_file_path, offset=0):
    print("Starting conversion process of .m4a to .WAV")
    out_path = os.path.splitext(video_file_path)[0] + ".wav"
@@ -539,7 +539,8 @@ def convert_to_wav(video_file_path, offset=0):
            logging.debug("ffmpeg being ran on windows")

            if sys.platform.startswith('win'):
-                ffmpeg_cmd = ".\\Bin\\ffmpeg.exe"
+                ffmpeg_cmd = "..\\Bin\\ffmpeg.exe"
+                logging.debug(f"ffmpeg_cmd: {ffmpeg_cmd}")
            else:
                ffmpeg_cmd = 'ffmpeg'  # Assume 'ffmpeg' is in PATH for non-Windows systems

@@ -749,7 +750,7 @@ def speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='sm


 ####################################################################################################################################
-#Summarizers
+# Summarizers
 #
 #

@@ -1023,7 +1024,7 @@ def summarize_with_llama(api_url, file_path, token, custom_prompt):
        logging.debug("API Response Data: %s", response_data)

        if response.status_code == 200:
-            #if 'X' in response_data:
+            # if 'X' in response_data:
            logging.debug(response_data)
            summary = response_data['content'].strip()
            logging.debug("llama: Summarization successful")
@@ -1236,28 +1237,11 @@ def process_text(api_key, text_file):
        return "Notice:", message


+def format_file_path(file_path):
+    # Helper function to check file existence and return an appropriate path or message
+    return file_path if file_path and os.path.exists(file_path) else None
+
 def launch_ui(demo_mode=False):
-    def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter,
-                    download_video):
-        try:
-            # Assuming 'main' is the function that handles the processing logic.
-            # Adjust parameters as needed based on your actual 'main' function implementation.
-            results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers,
-                           whisper_model=whisper_model, offset=offset, vad_filter=vad_filter,
-                           download_video_flag=download_video, custom_prompt=custom_prompt)
-
-            if results:
-                transcription_result = results[0]
-                json_data = transcription_result['transcription']
-                summary_file_path = transcription_result.get('summary', "Summary not available.")
-                json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json')
-                video_file_path = transcription_result.get('video_path', None)
-                return json_data, summary_file_path, json_file_path, summary_file_path, video_file_path
-            else:
-                return "No results found.", "No summary available.", None, None, None
-        except Exception as e:
-            return str(e), "Error processing the request.", None, None, None
-
    inputs = [
        gr.components.Textbox(label="URL", placeholder="Enter the video URL here"),
        gr.components.Number(value=2, label="Number of Speakers"),
@@ -1275,8 +1259,90 @@ def launch_ui(demo_mode=False):
    outputs = [
        gr.components.Textbox(label="Transcription"),
        gr.components.Textbox(label="Summary or Status Message"),
-        gr.components.File(label="Download Transcription as JSON", visible=lambda x: x is not None),
-        gr.components.File(label="Download Summary as Text", visible=lambda x: x is not None),
+        gr.components.File(label="Download Transcription as JSON", visible=lambda x: x != "File not available"),
+        gr.components.File(label="Download Summary as Text", visible=lambda x: x != "File not available"),
+        gr.components.File(label="Download Video", visible=lambda x: x is not None)
+    ]
+
+    def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter,
+                    download_video):
+        video_file_path = None
+        try:
+            results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers,
+                           whisper_model=whisper_model, offset=offset, vad_filter=vad_filter,
+                           download_video_flag=download_video, custom_prompt=custom_prompt)
+            if results:
+                transcription_result = results[0]
+                json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json')
+                summary_file_path = json_file_path.replace('.segments.json', '_summary.txt')
+
+                json_file_path = format_file_path(json_file_path)
+                summary_file_path = format_file_path(summary_file_path)
+
+                return transcription_result['transcription'], "Summary available", json_file_path, summary_file_path, video_file_path
+            else:
+                return "No results found.", "No summary available.", None, None
+        except Exception as e:
+            return str(e), "Error processing the request.", None, None
+
+    iface = gr.Interface(
+        fn=process_url,
+        inputs=inputs,
+        outputs=outputs,
+        title="Video Transcription and Summarization",
+        description="Submit a video URL for transcription and summarization. Ensure you input all necessary information including API keys."
+    )
+
+    iface.launch(share=False)
+
+
+
+
+a = """def launch_ui(demo_mode=False):
+    def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter,
+                    download_video):
+        try:
+            results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers,
+                           whisper_model=whisper_model, offset=offset, vad_filter=vad_filter,
+                           download_video_flag=download_video, custom_prompt=custom_prompt)
+
+            if results:
+                transcription_result = results[0]
+                json_data = transcription_result['transcription']
+                json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json')
+                summary_file_path = transcription_result.get('summary', "Summary not available.")
+                video_file_path = transcription_result.get('video_path', None)
+
+                json_file_path = format_file_path(json_file_path)
+                summary_file_path = format_file_path(summary_file_path)
+
+                return json_data, "Summary available", json_file_path, summary_file_path, video_file_path
+            else:
+                return "No results found.", "No summary available.", None, None, None
+        except Exception as e:
+            return str(e), "Error processing the request.", None, None, None, None
+
+    inputs = [
+        gr.components.Textbox(label="URL", placeholder="Enter the video URL here"),
+        gr.components.Number(value=2, label="Number of Speakers"),
+        gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model"),
+        gr.components.Textbox(label="Custom Prompt",
+                              placeholder="Q: As a professional summarizer, create a concise and comprehensive summary of the provided text.\nA: Here is a detailed, bulleted list of the key points made in the transcribed video and supporting arguments:",
+                              lines=3),
+        gr.components.Number(value=0, label="Offset"),
+        gr.components.Dropdown(
+            choices=["huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"],
+            label="API Name"),
+        gr.components.Textbox(label="API Key", placeholder="Enter your API key here"),
+        gr.components.Checkbox(label="VAD Filter", value=False),
+        gr.components.Checkbox(label="Download Video", value=False)
+    ]
+
+    outputs = [
+        gr.components.Textbox(label="Transcription"),
+        gr.components.Textbox(label="Summary or Status Message"),
+        gr.components.File(label="Download Transcription as JSON", visible=lambda x: x != "File not available"),
+        gr.components.File(label="Download Summary as Text", visible=lambda x: x != "File not available"),
        gr.components.File(label="Download Video", visible=lambda x: x is not None)
    ]

@@ -1290,7 +1356,7 @@ def launch_ui(demo_mode=False):
    )

    iface.launch(share=False)
-
+"""

 #
 #
@@ -1332,7 +1398,12 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
                    download_path = create_download_directory(info_dict['title'])
                    logging.debug("MAIN: Path created successfully")
                    logging.debug("MAIN: Downloading video from yt_dlp...")
-                    video_path = download_video(path, download_path, info_dict, download_video_flag)
+                    try:
+                        video_path = download_video(path, download_path, info_dict, download_video_flag)
+                    except RuntimeError as e:
+                        logging.error(f"Error downloading video: {str(e)}")
+                        #FIXME - figure something out for handling this situation....
+                        continue
                    logging.debug("MAIN: Video downloaded successfully")
                    logging.debug("MAIN: Converting video file to WAV...")
                    audio_file = convert_to_wav(video_path, offset)
@@ -1436,7 +1507,7 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
            logging.error(f"Error processing path: {path}")
            logging.error(str(e))
    end_time = time.monotonic()
-    #print("Total program execution time: " + timedelta(seconds=end_time - start_time))
+    # print("Total program execution time: " + timedelta(seconds=end_time - start_time))

    return results

@@ -1455,7 +1526,9 @@ if __name__ == "__main__":
                        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Log level (default: INFO)')
    parser.add_argument('-ui', '--user_interface', action='store_true', help='Launch the Gradio user interface')
    parser.add_argument('-demo', '--demo_mode', action='store_true', help='Enable demo mode')
-    #parser.add_argument('--log_file', action=str, help='Where to save logfile (non-default)')
+    parser.add_argument('-prompt', '--custom_prompt', type=str,
+                        help='Pass in a custom prompt to be used in place of the existing one.(Probably should just modify the script itself...)')
+    # parser.add_argument('--log_file', action=str, help='Where to save logfile (non-default)')
    args = parser.parse_args()

    custom_prompt = args.custom_prompt
@@ -1467,9 +1540,9 @@ if __name__ == "__main__":
        args.custom_prompt = "\n\nQ: As a professional summarizer, create a concise and comprehensive summary of the provided text.\nA: Here is a detailed, bulleted list of the key points made in the transcribed video and supporting arguments:"
        print("No custom prompt defined, will use default")

-    print(f"Is CUDA available: {torch.cuda.is_available()}")
+    # print(f"Is CUDA available: {torch.cuda.is_available()}")
    # True
-    print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
+    # print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
    # Tesla T4

    # Since this is running in HF....
@@ -1491,7 +1564,7 @@ if __name__ == "__main__":
        logging.info(f'Whisper model: {args.whisper_model}')
        logging.info(f'Offset: {args.offset}')
        logging.info(f'VAD filter: {args.vad_filter}')
-        logging.info(f'Log Level: {args.log_level}')  #lol
+        logging.info(f'Log Level: {args.log_level}')  # lol

        if args.api_name and args.api_key:
            logging.info(f'API: {args.api_name}')
--- a/Long_Summarize_openai.py
+++ b/Long_Summarize_openai.py
@@ -1,10 +1,11 @@
-import os
 from typing import List, Tuple, Optional
 from openai import OpenAI
 import tiktoken
 from tqdm import tqdm


+# script from: https://github.com/openai/openai-cookbook/blob/main/examples/Summarizing_long_documents.ipynb
+

 # Open dataset
 with open(".\\tldw-original-scripts\\Samples\\ai_wikipedia.txt", "r") as file:
@@ -14,15 +15,15 @@ with open(".\\tldw-original-scripts\\Samples\\ai_wikipedia.txt", "r") as file:
 encoding = tiktoken.encoding_for_model('gpt-4-turbo')
 print(len(encoding.encode(artificial_intelligence)))

-
 # Call wrapper to OpenAI
 client = OpenAI(api_key="")

+
 def get_chat_completion(messages, model='gpt-4-turbo'):
    response = client.chat.completions.create(
-        model = model,
-        messages = messages,
-        temperature = 0,
+        model=model,
+        messages=messages,
+        temperature=0,
    )
    return response.choices[0].message.content

@@ -32,6 +33,7 @@ def tokenize(text: str) -> List[str]:
    encoding = tiktoken.encoding_for_model('gpt-4-turbo')
    return encoding.encode(text)

+
 # This function chunks a text into smaller pieces based on a maximum token count and a delimiter
 def chunk_on_delimiter(input_string: str,
                       max_tokens: int,
@@ -45,13 +47,181 @@ def chunk_on_delimiter(input_string: str,
    combined_chunks = [f"{chunk}{delimiter}" for chunk in combined_chunks]
    return combined_chunks

+
 # This function combines text chunks into larger blocks without exceeding a specified token count.
 #   It returns the combined chunks, their original indices, and the number of dropped chunks due to overflow.
 def combine_chunks_with_no_minimum(
        chunks: List[str],
        max_tokens: int,
-        chunk_delimiter: str = "\n\n",
+        chunk_delimiter="\n\n",
        header: Optional[str] = None,
-        add_ellipsis_for_overflow: bool = False,
+        add_ellipsis_for_overflow=False,
+) -> Tuple[List[str], List[int]]:
+    dropped_chunk_count = 0
+    output = []  # list to hold the final combined chunks
+    output_indices = []  # list to hold the indices of the final combined chunks
+    candidate = (
+        [] if header is None else [header]
+    )  # list to hold the current combined chunk candidate
+    candidate_indices = []
+    for chunk_i, chunk in enumerate(chunks):
+        chunk_with_header = [chunk] if header is None else [header, chunk]
+        if len(tokenize(chunk_delimiter.join(chunk_with_header))) > max_tokens:
+            print(f"warning: chunk overflow")
+            if (
+                    add_ellipsis_for_overflow
+                    and len(tokenize(chunk_delimiter.join(candidate + ["..."]))) <= max_tokens
+            ):
+                candidate.append("...")
+                dropped_chunk_count += 1
+            continue  # this case would break downstream assumptions
+        # estimate token count with the current chunk added
+        extended_candidate_token_count = len(tokenize(chunk_delimiter.join(candidate + [chunk])))
+        # If the token count exceeds max_tokens, add the current candidate to output and start a new candidate
+        if extended_candidate_token_count > max_tokens:
+            output.append(chunk_delimiter.join(candidate))
+            output_indices.append(candidate_indices)
+            candidate = chunk_with_header  # re-initialize candidate
+            candidate_indices = [chunk_i]
+        # otherwise keep extending the candidate
+        else:
+            candidate.append(chunk)
+            candidate_indices.append(chunk_i)
+    # add the remaining candidate to output if it's not empty
+    if (header is not None and len(candidate) > 1) or (header is None and len(candidate) > 0):
+        output.append(chunk_delimiter.join(candidate))
+        output_indices.append(candidate_indices)
+    return output, output_indices, dropped_chunk_count


+def summarize(text: str,
+              detail: float = 0,
+              model: str = 'gpt-4-turbo',
+              additional_instructions: Optional[str] = None,
+              minimum_chunk_size: Optional[int] = 500,
+              chunk_delimiter: str = ".",
+              summarize_recursively=False,
+              verbose=False):
+    """
+    Summarizes a given text by splitting it into chunks, each of which is summarized individually.
+    The level of detail in the summary can be adjusted, and the process can optionally be made recursive.
+
+    Parameters: - text (str): The text to be summarized. - detail (float, optional): A value between 0 and 1
+    indicating the desired level of detail in the summary. 0 leads to a higher level summary, and 1 results in a more
+    detailed summary. Defaults to 0. - model (str, optional): The model to use for generating summaries. Defaults to
+    'gpt-3.5-turbo'. - additional_instructions (Optional[str], optional): Additional instructions to provide to the
+    model for customizing summaries. - minimum_chunk_size (Optional[int], optional): The minimum size for text
+    chunks. Defaults to 500. - chunk_delimiter (str, optional): The delimiter used to split the text into chunks.
+    Defaults to ".". - summarize_recursively (bool, optional): If True, summaries are generated recursively,
+    using previous summaries for context. - verbose (bool, optional): If True, prints detailed information about the
+    chunking process.
+
+    Returns:
+    - str: The final compiled summary of the text.
+
+    The function first determines the number of chunks by interpolating between a minimum and a maximum chunk count
+    based on the `detail` parameter. It then splits the text into chunks and summarizes each chunk. If
+    `summarize_recursively` is True, each summary is based on the previous summaries, adding more context to the
+    summarization process. The function returns a compiled summary of all chunks.
+    """
+
+    # check detail is set correctly
+    assert 0 <= detail <= 1
+
+    # interpolate the number of chunks based to get specified level of detail
+    max_chunks = len(chunk_on_delimiter(text, minimum_chunk_size, chunk_delimiter))
+    min_chunks = 1
+    num_chunks = int(min_chunks + detail * (max_chunks - min_chunks))
+
+    # adjust chunk_size based on interpolated number of chunks
+    document_length = len(tokenize(text))
+    chunk_size = max(minimum_chunk_size, document_length // num_chunks)
+    text_chunks = chunk_on_delimiter(text, chunk_size, chunk_delimiter)
+    if verbose:
+        print(f"Splitting the text into {len(text_chunks)} chunks to be summarized.")
+        print(f"Chunk lengths are {[len(tokenize(x)) for x in text_chunks]}")
+
+    # set system message
+    system_message_content = "Rewrite this text in summarized form."
+    if additional_instructions is not None:
+        system_message_content += f"\n\n{additional_instructions}"
+
+    accumulated_summaries = []
+    for chunk in tqdm(text_chunks):
+        if summarize_recursively and accumulated_summaries:
+            # Creating a structured prompt for recursive summarization
+            accumulated_summaries_string = '\n\n'.join(accumulated_summaries)
+            user_message_content = f"Previous summaries:\n\n{accumulated_summaries_string}\n\nText to summarize next:\n\n{chunk}"
+        else:
+            # Directly passing the chunk for summarization without recursive context
+            user_message_content = chunk
+
+        # Constructing messages based on whether recursive summarization is applied
+        messages = [
+            {"role": "system", "content": system_message_content},
+            {"role": "user", "content": user_message_content}
+        ]
+
+        # Assuming this function gets the completion and works as expected
+        response = get_chat_completion(messages, model=model)
+        accumulated_summaries.append(response)
+
+    # Compile final summary from partial summaries
+    final_summary = '\n\n'.join(accumulated_summaries)
+
+    return final_summary
+
+# Summary at 0 detail
+summary_with_detail_0 = summarize(artificial_intelligence, detail=0, verbose=True)
+
+
+# Summary at 0.25 detail
+summary_with_detail_pt25 = summarize(artificial_intelligence, detail=0.25, verbose=True)
+
+
+# Summary at 0.5 detail
+summary_with_detail_pt5 = summarize(artificial_intelligence, detail=0.5, verbose=True)
+
+
+# Summary at 0.75 detail
+summary_with_detail_pt75 = summarize(artificial_intelligence, detail=0.75, verbose=True)
+
+
+# Summart at 1 detail
+summary_with_detail_1 = summarize(artificial_intelligence, detail=1, verbose=True)
+
+
+# Lengths of summaries:
+[len(tokenize(x)) for x in
+ [summary_with_detail_0, summary_with_detail_pt25, summary_with_detail_pt5, summary_with_detail_pt75, summary_with_detail_1]]
+
+# print 0 detail summary
+print(summary_with_detail_0)
+
+
+# print 0.25 detail summary
+print(summary_with_detail_pt25)
+
+
+# print 0.5 detail summary
+print(summary_with_detail_pt5)
+
+
+# print 0.75 detail summary
+print(summary_with_detail_pt75)
+
+
+# print 1.0 detail summary
+print(summary_with_detail_1)
+
+
+# Print summary using additional instructions:
+summary_with_additional_instructions = summarize(artificial_intelligence_wikipedia_text, detail=0.1,
+                                                 additional_instructions="Write in point form and focus on numerical data.")
+print(summary_with_additional_instructions)
+
+
+# Print summary using recursive summarization:
+recursive_summary = summarize(artificial_intelligence_wikipedia_text, detail=0.1, summarize_recursively=True)
+print(recursive_summary)
+
--- a/summarize.py
+++ b/summarize.py
@@ -500,8 +500,8 @@ def download_video(video_url, download_path, info_dict, download_video_flag):
            ]
            subprocess.run(ffmpeg_command, check=True)
        else:
-            logging.error("You shouldn't be here...")
-            exit()
+            logging.error("ffmpeg: Unsupported operating system for video download and merging.")
+            raise RuntimeError("ffmpeg: Unsupported operating system for video download and merging.")
        os.remove(video_file_path)
        os.remove(audio_file_path)

@@ -533,6 +533,7 @@ def convert_to_wav(video_file_path, offset=0):

            if sys.platform.startswith('win'):
                ffmpeg_cmd = ".\\Bin\\ffmpeg.exe"
+                logging.debug(f"ffmpeg_cmd: {ffmpeg_cmd}")
            else:
                ffmpeg_cmd = 'ffmpeg'  # Assume 'ffmpeg' is in PATH for non-Windows systems