More app.py fixes...

This commit is contained in:
Robert
2024-05-11 22:25:29 -07:00
parent 143dbad761
commit bb1acc42b9
4 changed files with 300 additions and 56 deletions

BIN
.gitignore vendored

Binary file not shown.

167
HF/app.py
View File

@@ -39,7 +39,7 @@ import yt_dlp
# 2. Usage of/Hardcoding HF_TOKEN as token for API calls
# 3. Usage of HuggingFace for Inference
# 4. Other stuff I can't remember. Will eventually do a diff and document them.
#
#
####
@@ -63,10 +63,10 @@ import yt_dlp
# llama.cpp)/`ooba` (oobabooga/text-gen-webui)/`kobold` (kobold.cpp)/`tabby` (Tabbyapi)) API:** python summarize.py
# -v https://www.youtube.com/watch?v=4nd1CDZP21s -api <your choice of API>` - Make sure to put your API key into
# `config.txt` under the appropriate API variable
#
#
# Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
# python summarize.py ./local/file_on_your/system --api_name <API_name>`
#
#
# Run it as a WebApp**
# python summarize.py -gui` - This requires you to either stuff your API keys into the `config.txt` file, or pass them into the app every time you want to use it.
# Can be helpful for setting up a shared instance, but not wanting people to perform inference on your server.
@@ -120,7 +120,7 @@ output_path = config.get('Paths', 'output_path', fallback='results')
processing_choice = config.get('Processing', 'processing_choice', fallback='cpu')
# Log file
#logging.basicConfig(filename='debug-runtime.log', encoding='utf-8', level=logging.DEBUG)
# logging.basicConfig(filename='debug-runtime.log', encoding='utf-8', level=logging.DEBUG)
#
#
@@ -148,8 +148,8 @@ print(r"""
| | | | / / | | | || |/\| |
| | | |____ / / | |/ / \ /\ / _
\_/ \_____//_/ |___/ \/ \/ (_)
_ _
| | | |
| |_ ___ ___ | | ___ _ __ __ _
@@ -168,8 +168,8 @@ print(r"""
####################################################################################################################################
# System Checks
#
#
#
#
# Perform Platform Check
userOS = ""
@@ -291,13 +291,13 @@ def download_ffmpeg():
#
#
#
####################################################################################################################################
####################################################################################################################################
# Processing Paths and local file handling
#
#
#
def read_paths_from_file(file_path):
@@ -374,7 +374,7 @@ def process_url(input_path, num_speakers=2, whisper_model="small.en", custom_pro
return json_data, summary_file_path, json_file_path, summary_file_path
else:
return json_data, "Summary not available.", json_file_path, None
return json_data, "Summary not available.", json_file_path, "Summary not available."
else:
return None, "No results found.", None, None
@@ -508,8 +508,8 @@ def download_video(video_url, download_path, info_dict, download_video_flag):
]
subprocess.run(ffmpeg_command, check=True)
else:
logging.error("You shouldn't be here...")
exit()
logging.error("ffmpeg: Unsupported operating system for video download and merging.")
raise RuntimeError("ffmpeg: Unsupported operating system for video download and merging.")
os.remove(video_file_path)
os.remove(audio_file_path)
@@ -529,7 +529,7 @@ def download_video(video_url, download_path, info_dict, download_video_flag):
# https://www.gyan.dev/ffmpeg/builds/
#
#os.system(r'.\Bin\ffmpeg.exe -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"')
# os.system(r'.\Bin\ffmpeg.exe -ss 00:00:00 -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{out_path}"')
def convert_to_wav(video_file_path, offset=0):
print("Starting conversion process of .m4a to .WAV")
out_path = os.path.splitext(video_file_path)[0] + ".wav"
@@ -539,7 +539,8 @@ def convert_to_wav(video_file_path, offset=0):
logging.debug("ffmpeg being ran on windows")
if sys.platform.startswith('win'):
ffmpeg_cmd = ".\\Bin\\ffmpeg.exe"
ffmpeg_cmd = "..\\Bin\\ffmpeg.exe"
logging.debug(f"ffmpeg_cmd: {ffmpeg_cmd}")
else:
ffmpeg_cmd = 'ffmpeg' # Assume 'ffmpeg' is in PATH for non-Windows systems
@@ -749,7 +750,7 @@ def speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='sm
####################################################################################################################################
#Summarizers
# Summarizers
#
#
@@ -1023,7 +1024,7 @@ def summarize_with_llama(api_url, file_path, token, custom_prompt):
logging.debug("API Response Data: %s", response_data)
if response.status_code == 200:
#if 'X' in response_data:
# if 'X' in response_data:
logging.debug(response_data)
summary = response_data['content'].strip()
logging.debug("llama: Summarization successful")
@@ -1236,28 +1237,11 @@ def process_text(api_key, text_file):
return "Notice:", message
def format_file_path(file_path):
# Helper function to check file existence and return an appropriate path or message
return file_path if file_path and os.path.exists(file_path) else None
def launch_ui(demo_mode=False):
def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter,
download_video):
try:
# Assuming 'main' is the function that handles the processing logic.
# Adjust parameters as needed based on your actual 'main' function implementation.
results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers,
whisper_model=whisper_model, offset=offset, vad_filter=vad_filter,
download_video_flag=download_video, custom_prompt=custom_prompt)
if results:
transcription_result = results[0]
json_data = transcription_result['transcription']
summary_file_path = transcription_result.get('summary', "Summary not available.")
json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json')
video_file_path = transcription_result.get('video_path', None)
return json_data, summary_file_path, json_file_path, summary_file_path, video_file_path
else:
return "No results found.", "No summary available.", None, None, None
except Exception as e:
return str(e), "Error processing the request.", None, None, None
inputs = [
gr.components.Textbox(label="URL", placeholder="Enter the video URL here"),
gr.components.Number(value=2, label="Number of Speakers"),
@@ -1275,8 +1259,90 @@ def launch_ui(demo_mode=False):
outputs = [
gr.components.Textbox(label="Transcription"),
gr.components.Textbox(label="Summary or Status Message"),
gr.components.File(label="Download Transcription as JSON", visible=lambda x: x is not None),
gr.components.File(label="Download Summary as Text", visible=lambda x: x is not None),
gr.components.File(label="Download Transcription as JSON", visible=lambda x: x != "File not available"),
gr.components.File(label="Download Summary as Text", visible=lambda x: x != "File not available"),
gr.components.File(label="Download Video", visible=lambda x: x is not None)
]
def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter,
download_video):
video_file_path = None
try:
results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers,
whisper_model=whisper_model, offset=offset, vad_filter=vad_filter,
download_video_flag=download_video, custom_prompt=custom_prompt)
if results:
transcription_result = results[0]
json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json')
summary_file_path = json_file_path.replace('.segments.json', '_summary.txt')
json_file_path = format_file_path(json_file_path)
summary_file_path = format_file_path(summary_file_path)
return transcription_result['transcription'], "Summary available", json_file_path, summary_file_path, video_file_path
else:
return "No results found.", "No summary available.", None, None
except Exception as e:
return str(e), "Error processing the request.", None, None
iface = gr.Interface(
fn=process_url,
inputs=inputs,
outputs=outputs,
title="Video Transcription and Summarization",
description="Submit a video URL for transcription and summarization. Ensure you input all necessary information including API keys."
)
iface.launch(share=False)
a = """def launch_ui(demo_mode=False):
def process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter,
download_video):
try:
results = main(url, api_name=api_name, api_key=api_key, num_speakers=num_speakers,
whisper_model=whisper_model, offset=offset, vad_filter=vad_filter,
download_video_flag=download_video, custom_prompt=custom_prompt)
if results:
transcription_result = results[0]
json_data = transcription_result['transcription']
json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json')
summary_file_path = transcription_result.get('summary', "Summary not available.")
video_file_path = transcription_result.get('video_path', None)
json_file_path = format_file_path(json_file_path)
summary_file_path = format_file_path(summary_file_path)
return json_data, "Summary available", json_file_path, summary_file_path, video_file_path
else:
return "No results found.", "No summary available.", None, None, None
except Exception as e:
return str(e), "Error processing the request.", None, None, None, None
inputs = [
gr.components.Textbox(label="URL", placeholder="Enter the video URL here"),
gr.components.Number(value=2, label="Number of Speakers"),
gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model"),
gr.components.Textbox(label="Custom Prompt",
placeholder="Q: As a professional summarizer, create a concise and comprehensive summary of the provided text.\nA: Here is a detailed, bulleted list of the key points made in the transcribed video and supporting arguments:",
lines=3),
gr.components.Number(value=0, label="Offset"),
gr.components.Dropdown(
choices=["huggingface", "openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"],
label="API Name"),
gr.components.Textbox(label="API Key", placeholder="Enter your API key here"),
gr.components.Checkbox(label="VAD Filter", value=False),
gr.components.Checkbox(label="Download Video", value=False)
]
outputs = [
gr.components.Textbox(label="Transcription"),
gr.components.Textbox(label="Summary or Status Message"),
gr.components.File(label="Download Transcription as JSON", visible=lambda x: x != "File not available"),
gr.components.File(label="Download Summary as Text", visible=lambda x: x != "File not available"),
gr.components.File(label="Download Video", visible=lambda x: x is not None)
]
@@ -1290,7 +1356,7 @@ def launch_ui(demo_mode=False):
)
iface.launch(share=False)
"""
#
#
@@ -1332,7 +1398,12 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
download_path = create_download_directory(info_dict['title'])
logging.debug("MAIN: Path created successfully")
logging.debug("MAIN: Downloading video from yt_dlp...")
video_path = download_video(path, download_path, info_dict, download_video_flag)
try:
video_path = download_video(path, download_path, info_dict, download_video_flag)
except RuntimeError as e:
logging.error(f"Error downloading video: {str(e)}")
#FIXME - figure something out for handling this situation....
continue
logging.debug("MAIN: Video downloaded successfully")
logging.debug("MAIN: Converting video file to WAV...")
audio_file = convert_to_wav(video_path, offset)
@@ -1436,7 +1507,7 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
logging.error(f"Error processing path: {path}")
logging.error(str(e))
end_time = time.monotonic()
#print("Total program execution time: " + timedelta(seconds=end_time - start_time))
# print("Total program execution time: " + timedelta(seconds=end_time - start_time))
return results
@@ -1455,7 +1526,9 @@ if __name__ == "__main__":
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Log level (default: INFO)')
parser.add_argument('-ui', '--user_interface', action='store_true', help='Launch the Gradio user interface')
parser.add_argument('-demo', '--demo_mode', action='store_true', help='Enable demo mode')
#parser.add_argument('--log_file', action=str, help='Where to save logfile (non-default)')
parser.add_argument('-prompt', '--custom_prompt', type=str,
help='Pass in a custom prompt to be used in place of the existing one.(Probably should just modify the script itself...)')
# parser.add_argument('--log_file', action=str, help='Where to save logfile (non-default)')
args = parser.parse_args()
custom_prompt = args.custom_prompt
@@ -1467,9 +1540,9 @@ if __name__ == "__main__":
args.custom_prompt = "\n\nQ: As a professional summarizer, create a concise and comprehensive summary of the provided text.\nA: Here is a detailed, bulleted list of the key points made in the transcribed video and supporting arguments:"
print("No custom prompt defined, will use default")
print(f"Is CUDA available: {torch.cuda.is_available()}")
# print(f"Is CUDA available: {torch.cuda.is_available()}")
# True
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
# print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
# Tesla T4
# Since this is running in HF....
@@ -1491,7 +1564,7 @@ if __name__ == "__main__":
logging.info(f'Whisper model: {args.whisper_model}')
logging.info(f'Offset: {args.offset}')
logging.info(f'VAD filter: {args.vad_filter}')
logging.info(f'Log Level: {args.log_level}') #lol
logging.info(f'Log Level: {args.log_level}') # lol
if args.api_name and args.api_key:
logging.info(f'API: {args.api_name}')

View File

@@ -1,10 +1,11 @@
import os
from typing import List, Tuple, Optional
from openai import OpenAI
import tiktoken
from tqdm import tqdm
# script from: https://github.com/openai/openai-cookbook/blob/main/examples/Summarizing_long_documents.ipynb
# Open dataset
with open(".\\tldw-original-scripts\\Samples\\ai_wikipedia.txt", "r") as file:
@@ -14,15 +15,15 @@ with open(".\\tldw-original-scripts\\Samples\\ai_wikipedia.txt", "r") as file:
encoding = tiktoken.encoding_for_model('gpt-4-turbo')
print(len(encoding.encode(artificial_intelligence)))
# Call wrapper to OpenAI
client = OpenAI(api_key="")
def get_chat_completion(messages, model='gpt-4-turbo'):
response = client.chat.completions.create(
model = model,
messages = messages,
temperature = 0,
model=model,
messages=messages,
temperature=0,
)
return response.choices[0].message.content
@@ -32,6 +33,7 @@ def tokenize(text: str) -> List[str]:
encoding = tiktoken.encoding_for_model('gpt-4-turbo')
return encoding.encode(text)
# This function chunks a text into smaller pieces based on a maximum token count and a delimiter
def chunk_on_delimiter(input_string: str,
max_tokens: int,
@@ -45,13 +47,181 @@ def chunk_on_delimiter(input_string: str,
combined_chunks = [f"{chunk}{delimiter}" for chunk in combined_chunks]
return combined_chunks
# This function combines text chunks into larger blocks without exceeding a specified token count.
# It returns the combined chunks, their original indices, and the number of dropped chunks due to overflow.
def combine_chunks_with_no_minimum(
chunks: List[str],
max_tokens: int,
chunk_delimiter: str = "\n\n",
chunk_delimiter="\n\n",
header: Optional[str] = None,
add_ellipsis_for_overflow: bool = False,
add_ellipsis_for_overflow=False,
) -> Tuple[List[str], List[int]]:
dropped_chunk_count = 0
output = [] # list to hold the final combined chunks
output_indices = [] # list to hold the indices of the final combined chunks
candidate = (
[] if header is None else [header]
) # list to hold the current combined chunk candidate
candidate_indices = []
for chunk_i, chunk in enumerate(chunks):
chunk_with_header = [chunk] if header is None else [header, chunk]
if len(tokenize(chunk_delimiter.join(chunk_with_header))) > max_tokens:
print(f"warning: chunk overflow")
if (
add_ellipsis_for_overflow
and len(tokenize(chunk_delimiter.join(candidate + ["..."]))) <= max_tokens
):
candidate.append("...")
dropped_chunk_count += 1
continue # this case would break downstream assumptions
# estimate token count with the current chunk added
extended_candidate_token_count = len(tokenize(chunk_delimiter.join(candidate + [chunk])))
# If the token count exceeds max_tokens, add the current candidate to output and start a new candidate
if extended_candidate_token_count > max_tokens:
output.append(chunk_delimiter.join(candidate))
output_indices.append(candidate_indices)
candidate = chunk_with_header # re-initialize candidate
candidate_indices = [chunk_i]
# otherwise keep extending the candidate
else:
candidate.append(chunk)
candidate_indices.append(chunk_i)
# add the remaining candidate to output if it's not empty
if (header is not None and len(candidate) > 1) or (header is None and len(candidate) > 0):
output.append(chunk_delimiter.join(candidate))
output_indices.append(candidate_indices)
return output, output_indices, dropped_chunk_count
def summarize(text: str,
detail: float = 0,
model: str = 'gpt-4-turbo',
additional_instructions: Optional[str] = None,
minimum_chunk_size: Optional[int] = 500,
chunk_delimiter: str = ".",
summarize_recursively=False,
verbose=False):
"""
Summarizes a given text by splitting it into chunks, each of which is summarized individually.
The level of detail in the summary can be adjusted, and the process can optionally be made recursive.
Parameters: - text (str): The text to be summarized. - detail (float, optional): A value between 0 and 1
indicating the desired level of detail in the summary. 0 leads to a higher level summary, and 1 results in a more
detailed summary. Defaults to 0. - model (str, optional): The model to use for generating summaries. Defaults to
'gpt-3.5-turbo'. - additional_instructions (Optional[str], optional): Additional instructions to provide to the
model for customizing summaries. - minimum_chunk_size (Optional[int], optional): The minimum size for text
chunks. Defaults to 500. - chunk_delimiter (str, optional): The delimiter used to split the text into chunks.
Defaults to ".". - summarize_recursively (bool, optional): If True, summaries are generated recursively,
using previous summaries for context. - verbose (bool, optional): If True, prints detailed information about the
chunking process.
Returns:
- str: The final compiled summary of the text.
The function first determines the number of chunks by interpolating between a minimum and a maximum chunk count
based on the `detail` parameter. It then splits the text into chunks and summarizes each chunk. If
`summarize_recursively` is True, each summary is based on the previous summaries, adding more context to the
summarization process. The function returns a compiled summary of all chunks.
"""
# check detail is set correctly
assert 0 <= detail <= 1
# interpolate the number of chunks based to get specified level of detail
max_chunks = len(chunk_on_delimiter(text, minimum_chunk_size, chunk_delimiter))
min_chunks = 1
num_chunks = int(min_chunks + detail * (max_chunks - min_chunks))
# adjust chunk_size based on interpolated number of chunks
document_length = len(tokenize(text))
chunk_size = max(minimum_chunk_size, document_length // num_chunks)
text_chunks = chunk_on_delimiter(text, chunk_size, chunk_delimiter)
if verbose:
print(f"Splitting the text into {len(text_chunks)} chunks to be summarized.")
print(f"Chunk lengths are {[len(tokenize(x)) for x in text_chunks]}")
# set system message
system_message_content = "Rewrite this text in summarized form."
if additional_instructions is not None:
system_message_content += f"\n\n{additional_instructions}"
accumulated_summaries = []
for chunk in tqdm(text_chunks):
if summarize_recursively and accumulated_summaries:
# Creating a structured prompt for recursive summarization
accumulated_summaries_string = '\n\n'.join(accumulated_summaries)
user_message_content = f"Previous summaries:\n\n{accumulated_summaries_string}\n\nText to summarize next:\n\n{chunk}"
else:
# Directly passing the chunk for summarization without recursive context
user_message_content = chunk
# Constructing messages based on whether recursive summarization is applied
messages = [
{"role": "system", "content": system_message_content},
{"role": "user", "content": user_message_content}
]
# Assuming this function gets the completion and works as expected
response = get_chat_completion(messages, model=model)
accumulated_summaries.append(response)
# Compile final summary from partial summaries
final_summary = '\n\n'.join(accumulated_summaries)
return final_summary
# Summary at 0 detail
summary_with_detail_0 = summarize(artificial_intelligence, detail=0, verbose=True)
# Summary at 0.25 detail
summary_with_detail_pt25 = summarize(artificial_intelligence, detail=0.25, verbose=True)
# Summary at 0.5 detail
summary_with_detail_pt5 = summarize(artificial_intelligence, detail=0.5, verbose=True)
# Summary at 0.75 detail
summary_with_detail_pt75 = summarize(artificial_intelligence, detail=0.75, verbose=True)
# Summart at 1 detail
summary_with_detail_1 = summarize(artificial_intelligence, detail=1, verbose=True)
# Lengths of summaries:
[len(tokenize(x)) for x in
[summary_with_detail_0, summary_with_detail_pt25, summary_with_detail_pt5, summary_with_detail_pt75, summary_with_detail_1]]
# print 0 detail summary
print(summary_with_detail_0)
# print 0.25 detail summary
print(summary_with_detail_pt25)
# print 0.5 detail summary
print(summary_with_detail_pt5)
# print 0.75 detail summary
print(summary_with_detail_pt75)
# print 1.0 detail summary
print(summary_with_detail_1)
# Print summary using additional instructions:
summary_with_additional_instructions = summarize(artificial_intelligence_wikipedia_text, detail=0.1,
additional_instructions="Write in point form and focus on numerical data.")
print(summary_with_additional_instructions)
# Print summary using recursive summarization:
recursive_summary = summarize(artificial_intelligence_wikipedia_text, detail=0.1, summarize_recursively=True)
print(recursive_summary)

View File

@@ -500,8 +500,8 @@ def download_video(video_url, download_path, info_dict, download_video_flag):
]
subprocess.run(ffmpeg_command, check=True)
else:
logging.error("You shouldn't be here...")
exit()
logging.error("ffmpeg: Unsupported operating system for video download and merging.")
raise RuntimeError("ffmpeg: Unsupported operating system for video download and merging.")
os.remove(video_file_path)
os.remove(audio_file_path)
@@ -533,6 +533,7 @@ def convert_to_wav(video_file_path, offset=0):
if sys.platform.startswith('win'):
ffmpeg_cmd = ".\\Bin\\ffmpeg.exe"
logging.debug(f"ffmpeg_cmd: {ffmpeg_cmd}")
else:
ffmpeg_cmd = 'ffmpeg' # Assume 'ffmpeg' is in PATH for non-Windows systems