First stab at custom summarization prompts

2026-03-10 08:51:17 +00:00 · 2024-05-10 16:11:18 -07:00
parent 03106dd0fc
commit 0dbd739eca
2 changed files with 83 additions and 66 deletions
--- a/HF/app.py
+++ b/HF/app.py
@@ -45,23 +45,22 @@ import yt_dlp
 #
 #
 # Usage:
-#          Transcribe a single URL: 
-#                python diarize.py https://example.com/video.mp4
 #
-#          Transcribe a single URL and have the resulting transcription summarized: 
-#                python diarize.py https://example.com/video.mp4
+#   Download Audio only from URL -> Transcribe audio:
+#       python summarize.py https://www.youtube.com/watch?v=4nd1CDZP21s`
 #
-#          Transcribe a list of files:
-#               python diarize.py ./path/to/your/text_file.txt
+#   Download Audio+Video from URL -> Transcribe audio from Video:**
+#       python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s`
 #
-#          Transcribe a local file:
-#               python diarize.py /path/to/your/localfile.mp4
-#
-#          Transcribe a local file and have it summarized:
-#               python diarize.py ./input.mp4 --api_name openai --api_key <your_openai_api_key>
-#
-#          Transcribe a list of files and have them all summarized:
-#               python diarize.py path_to_your_text_file.txt --api_name <openai> --api_key <your_openai_api_key>
+#   Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` (llama.cpp)/`ooba` (oobabooga/text-gen-webui)/`kobold` (kobold.cpp)/`tabby` (Tabbyapi)) API:**
+#       python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s -api <your choice of API>` - Make sure to put your API key into `config.txt` under the appropriate API variable
+# 
+#   Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
+#       python summarize.py ./local/file_on_your/system --api_name <API_name>`
+# 
+#   Run it as a WebApp**
+#       python summarize.py -gui` - This requires you to either stuff your API keys into the `config.txt` file, or pass them into the app every time you want to use it.
+#           Can be helpful for setting up a shared instance, but not wanting people to perform inference on your server.
 #
 ###

@@ -350,7 +349,7 @@ def process_local_file(file_path):
 # Video Download/Handling
 #

-def process_url(input_path, num_speakers=2, whisper_model="small.en", offset=0, api_name=None, api_key=None, vad_filter=False, download_video_flag=False, demo_mode=True):
+def process_url(input_path, num_speakers=2, whisper_model="small.en", offset=0, api_name=None, api_key=None, vad_filter=False, download_video_flag=False,custom_prompt=None, demo_mode=True):
    if demo_mode:
        api_name = "huggingface"
        api_key = os.environ.get(HF_TOKEN)
@@ -793,7 +792,7 @@ def summarize_with_openai(api_key, file_path, model):
        }
        
        logging.debug("openai: Preparing data + prompt for submittal")
-        prompt_text = f"{text} \n\n\n\nPlease provide a detailed, bulleted list of the points made throughout the transcribed video and any supporting arguments made for said points"
+        openai_prompt = f"{text} \n\n\n\n{prompt_text}"
        data = {
            "model": model,
            "messages": [
@@ -803,7 +802,7 @@ def summarize_with_openai(api_key, file_path, model):
                },
                {
                    "role": "user",
-                    "content": prompt_text
+                    "content": openai_prompt
                }
            ],
            "max_tokens": 4096,  # Adjust tokens as needed
@@ -846,7 +845,7 @@ def summarize_with_claude(api_key, file_path, model):
        logging.debug("anthropic: Prepping data + prompt for submittal")
        user_message = {
            "role": "user",
-            "content": f"{text} \n\n\n\nPlease provide a detailed, bulleted list of the points made throughout the transcribed video and any supporting arguments made for said points"
+            "content": f"{text} \n\n\n\n{prompt_text}"
        }

        data = {
@@ -913,10 +912,10 @@ def summarize_with_cohere(api_key, file_path, model):
            'Authorization': f'Bearer {api_key}'
        }

-        prompt_text = f"{text} \n\nAs a professional summarizer, create a concise and comprehensive summary of the provided text."
+        cohere_prompt = f"{text} \n\n\n\n{prompt_text}"
        data = {
            "chat_history": [
-                {"role": "USER", "message": prompt_text}
+                {"role": "USER", "message": cohere_prompt}
            ],
            "message": "Please provide a summary.",
            "model": model,
@@ -964,12 +963,12 @@ def summarize_with_groq(api_key, file_path, model):
            'Content-Type': 'application/json'
        }

-        prompt_text = f"{text} \n\nAs a professional summarizer, create a concise and comprehensive summary of the provided text."
+        groq_prompt = f"{text} \n\n\n\n{prompt_text}"
        data = {
            "messages": [
                {
                    "role": "user",
-                    "content": prompt_text
+                    "content": groq_prompt
                }
            ],
            "model": model
@@ -1021,12 +1020,13 @@ def summarize_with_llama(api_url, file_path, token):
            headers['Authorization'] = f'Bearer {token}'


-        prompt_text = f"{text} \n\nAs a professional summarizer, create a concise and comprehensive summary of the provided text."
+        llama_prompt = f"{text} \n\n\n\n{prompt_text}"
+        logging.debug(f"llama: Complete prompt is: {llama_prompt}")
        data = {
-            "prompt": prompt_text
+            "prompt": llama_prompt
        }

-        logging.debug("llama: Submitting request to API endpoint")
+        #logging.debug(f"llama: Submitting request to API endpoint {llama_prompt}")
        print("llama: Submitting request to API endpoint")
        response = requests.post(api_url, headers=headers, json=data)
        response_data = response.json()
@@ -1064,13 +1064,13 @@ def summarize_with_kobold(api_url, file_path):
            'content-type': 'application/json',
        }
        # FIXME
-        prompt_text = f"{text} \n\nAs a professional summarizer, create a concise and comprehensive summary of the above text."
-        logging.debug(prompt_text)
+        kobold_prompt = f"{text} \n\n\n\n{prompt_text}"
+        logging.debug(kobold_prompt)
        # Values literally c/p from the api docs....
        data = {
            "max_context_length": 8096,
            "max_length": 4096,
-            "prompt": prompt_text,
+            "prompt": kobold_prompt,
        }

        logging.debug("kobold: Submitting request to API endpoint")
@@ -1114,9 +1114,9 @@ def summarize_with_oobabooga(api_url, file_path):
            'content-type': 'application/json',
        }

-        prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a french bakery baking cakes. It is a fun job. I have been baking cakes for ten years. I also bake lots of other baked goods, but cakes are my favorite."
-        # prompt_text += f"\n\n{text}"  # Uncomment this line if you want to include the text variable
-        prompt_text += "\n\nAs a professional summarizer, create a concise and comprehensive summary of the provided text."
+        #prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a french bakery baking cakes. It is a fun job. I have been baking cakes for ten years. I also bake lots of other baked goods, but cakes are my favorite."
+        #prompt_text += f"\n\n{text}"  # Uncomment this line if you want to include the text variable
+        ooba_prompt = f"{text}\n\n\n\n{prompt_text}"

        data =  {
            "mode": "chat",
@@ -1268,6 +1268,7 @@ def launch_ui(demo_mode=False):
        gr.components.Textbox(label="URL of video to be Transcribed/Summarized"),
        gr.components.Number(value=2, label="Number of Speakers (for Diarization)"),
        gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model (Can ignore this)"),
+        gr.components.Textbox(label="Custom Prompt", value="Please provide a detailed, bulleted list of the points made throughout the transcribed video and any supporting arguments made for said points", lines=3),
        gr.components.Number(value=0, label="Offset time to start transcribing from\n\n (helpful if you only want part of a video/lecture)")
    ]

@@ -1316,6 +1317,7 @@ def launch_ui(demo_mode=False):
 ####################################################################################################################################
 # Main()
 #
+
 def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, download_video_flag=False, demo_mode=False):
    if input_path is None and args.user_interface:
        return []
--- a/summarize.py
+++ b/summarize.py
@@ -36,23 +36,22 @@ import yt_dlp
 #
 #
 # Usage:
-#          Transcribe a single URL: 
-#                python diarize.py https://example.com/video.mp4
 #
-#          Transcribe a single URL and have the resulting transcription summarized: 
-#                python diarize.py https://example.com/video.mp4
+#   Download Audio only from URL -> Transcribe audio:
+#       python summarize.py https://www.youtube.com/watch?v=4nd1CDZP21s`
 #
-#          Transcribe a list of files:
-#               python diarize.py ./path/to/your/text_file.txt
+#   Download Audio+Video from URL -> Transcribe audio from Video:**
+#       python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s`
 #
-#          Transcribe a local file:
-#               python diarize.py /path/to/your/localfile.mp4
-#
-#          Transcribe a local file and have it summarized:
-#               python diarize.py ./input.mp4 --api_name openai --api_key <your_openai_api_key>
-#
-#          Transcribe a list of files and have them all summarized:
-#               python diarize.py path_to_your_text_file.txt --api_name <openai> --api_key <your_openai_api_key>
+#   Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` (llama.cpp)/`ooba` (oobabooga/text-gen-webui)/`kobold` (kobold.cpp)/`tabby` (Tabbyapi)) API:**
+#       python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s -api <your choice of API>` - Make sure to put your API key into `config.txt` under the appropriate API variable
+# 
+#   Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
+#       python summarize.py ./local/file_on_your/system --api_name <API_name>`
+# 
+#   Run it as a WebApp**
+#       python summarize.py -gui` - This requires you to either stuff your API keys into the `config.txt` file, or pass them into the app every time you want to use it.
+#           Can be helpful for setting up a shared instance, but not wanting people to perform inference on your server.
 #
 ###

@@ -341,7 +340,7 @@ def process_local_file(file_path):
 # Video Download/Handling
 #

-def process_url(input_path, num_speakers=2, whisper_model="small.en", offset=0, api_name=None, api_key=None, vad_filter=False, download_video_flag=False, demo_mode=False):
+def process_url(input_path, num_speakers=2, whisper_model="small.en", custom_prompt=None, offset=0, api_name=None, api_key=None, vad_filter=False, download_video_flag=False, demo_mode=False):
    if demo_mode:
 #        api_name = "<demo_mode_api>"
 #        api_key = "<demo_mode_key>"
@@ -782,7 +781,7 @@ def summarize_with_openai(api_key, file_path, model):
        }
        
        logging.debug("openai: Preparing data + prompt for submittal")
-        prompt_text = f"{text} \n\n\n\nPlease provide a detailed, bulleted list of the points made throughout the transcribed video and any supporting arguments made for said points"
+        openai_prompt = f"{text} \n\n\n\n{custom_prompt}"
        data = {
            "model": model,
            "messages": [
@@ -792,7 +791,7 @@ def summarize_with_openai(api_key, file_path, model):
                },
                {
                    "role": "user",
-                    "content": prompt_text
+                    "content": openai_prompt
                }
            ],
            "max_tokens": 4096,  # Adjust tokens as needed
@@ -831,11 +830,12 @@ def summarize_with_claude(api_key, file_path, model):
            'anthropic-version': '2023-06-01',
            'Content-Type': 'application/json'
        }
-        
-        logging.debug("anthropic: Prepping data + prompt for submittal")
+
+        anthropic_prompt = custom_prompt
+        logging.debug("anthropic: Prompt is {anthropic_prompt}")
        user_message = {
            "role": "user",
-            "content": f"{text} \n\n\n\nPlease provide a detailed, bulleted list of the points made throughout the transcribed video and any supporting arguments made for said points"
+            "content": f"{text} \n\n\n\n{anthropic_prompt}"
        }

        data = {
@@ -902,10 +902,12 @@ def summarize_with_cohere(api_key, file_path, model):
            'Authorization': f'Bearer {api_key}'
        }

-        prompt_text = f"{text} \n\nAs a professional summarizer, create a concise and comprehensive summary of the provided text."
+        cohere_prompt = f"{text} \n\n\n\n{custom_prompt}"
+        logging.debug("cohere: Prompt being sent is {cohere_prompt}")
+
        data = {
            "chat_history": [
-                {"role": "USER", "message": prompt_text}
+                {"role": "USER", "message": cohere_prompt}
            ],
            "message": "Please provide a summary.",
            "model": model,
@@ -953,12 +955,14 @@ def summarize_with_groq(api_key, file_path, model):
            'Content-Type': 'application/json'
        }

-        prompt_text = f"{text} \n\nAs a professional summarizer, create a concise and comprehensive summary of the provided text."
+        groq_prompt = f"{text} \n\n\n\n{custom_prompt}"
+        logging.debug("groq: Prompt being sent is {groq_prompt}")
+
        data = {
            "messages": [
                {
                    "role": "user",
-                    "content": prompt_text
+                    "content": groq_prompt
                }
            ],
            "model": model
@@ -1010,9 +1014,11 @@ def summarize_with_llama(api_url, file_path, token):
            headers['Authorization'] = f'Bearer {token}'


-        prompt_text = f"{text} \n\nAs a professional summarizer, create a concise and comprehensive summary of the provided text."
+        llama_prompt = f"{text} \n\n\n\n{custom_prompt}"
+        logging.debug("llama: Prompt being sent is {llama_prompt}")
+
        data = {
-            "prompt": prompt_text
+            "prompt": llama_prompt
        }

        logging.debug("llama: Submitting request to API endpoint")
@@ -1052,14 +1058,16 @@ def summarize_with_kobold(api_url, file_path):
            'accept': 'application/json',
            'content-type': 'application/json',
        }
+
+        kobold_prompt = f"{text} \n\n\n\n{custom_prompt}"
+        logging.debug("kobold: Prompt being sent is {kobold_prompt}")
+
        # FIXME
-        prompt_text = f"{text} \n\nAs a professional summarizer, create a concise and comprehensive summary of the above text."
-        logging.debug(prompt_text)
        # Values literally c/p from the api docs....
        data = {
            "max_context_length": 8096,
            "max_length": 4096,
-            "prompt": prompt_text,
+            "prompt": kobold_prompt,
        }

        logging.debug("kobold: Submitting request to API endpoint")
@@ -1103,14 +1111,15 @@ def summarize_with_oobabooga(api_url, file_path):
            'content-type': 'application/json',
        }

-        prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a french bakery baking cakes. It is a fun job. I have been baking cakes for ten years. I also bake lots of other baked goods, but cakes are my favorite."
+        # prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a french bakery baking cakes. It is a fun job. I have been baking cakes for ten years. I also bake lots of other baked goods, but cakes are my favorite."
        # prompt_text += f"\n\n{text}"  # Uncomment this line if you want to include the text variable
-        prompt_text += "\n\nAs a professional summarizer, create a concise and comprehensive summary of the provided text."
+        ooba_prompt = "{text}\n\n\n\n{custom_prompt}"
+        logging.debug("ooba: Prompt being sent is {ooba_prompt}")

        data =  {
            "mode": "chat",
            "character": "Example",
-            "messages": [{"role": "user", "content": prompt_text}]
+            "messages": [{"role": "user", "content": ooba_prompt}]
        }

        logging.debug("ooba: Submitting request to API endpoint")
@@ -1166,7 +1175,6 @@ def summarize_with_huggingface(api_key, file_path):
        logging.debug(f"huggingface: Segments: {segments}")
        text = ' '.join([segment['text'] for segment in segments])

-
 # API KEY ASSIGNMENT HERE
        api_key = huggingface_api_key
        print(f"huggingface: lets make sure the HF api key exists...\n\t {huggingface_api_key}" )
@@ -1176,6 +1184,10 @@ def summarize_with_huggingface(api_key, file_path):

        model = "microsoft/Phi-3-mini-128k-instruct"
        API_URL = f"https://api-inference.huggingface.co/models/{model}"
+
+
+        huggingface_prompt = "{text}\n\n\n\n{custom_prompt}"
+        logging.debug("huggingface: Prompt being sent is {huggingface_prompt}")
        data = {
            "inputs": text,
            "parameters": {"max_length": 512, "min_length": 100}  # You can adjust max_length and min_length as needed
@@ -1184,6 +1196,7 @@ def summarize_with_huggingface(api_key, file_path):
        print(f"huggingface: lets make sure the HF api key is the same..\n\t {huggingface_api_key}")

        logging.debug("huggingface: Submitting request...")
+
        response = requests.post(API_URL, headers=headers, json=data)
        
        if response.status_code == 200:
@@ -1217,6 +1230,7 @@ def launch_ui(demo_mode=False):
        gr.components.Textbox(label="URL"),
        gr.components.Number(value=2, label="Number of Speakers"),
        gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model"),
+        gr.components.Textbox(label="Custom Prompt", value="Please provide a detailed, bulleted list of the points made throughout the transcribed video and any supporting arguments made for said points", lines=3),
        gr.components.Number(value=0, label="Offset")
    ]

@@ -1245,7 +1259,8 @@ def launch_ui(demo_mode=False):
        theme="bethecloud/storj_theme"
    )

-    iface.launch(share=True)
+    #iface.launch(share=True)
+    iface.launch(share=False)

 #
 #
@@ -1260,7 +1275,7 @@ def launch_ui(demo_mode=False):
 ####################################################################################################################################
 # Main()
 #
-def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, download_video_flag=False):
+def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, download_video_flag=False, custom_prompt=None):
    if input_path is None and args.user_interface:
        return []
    start_time = time.monotonic()