update wip

This commit is contained in:
Mike
2023-08-14 20:47:55 -04:00
parent 52a5db52c0
commit 5d43febdce
2 changed files with 3 additions and 5 deletions

View File

@@ -20,6 +20,7 @@ This project is under active development and is not ready for production use.
|--------|-----|-------------|---|
|ufo.txt| Subcommittee on National Security, the Border, and Foreign Affairs Hearing | small.en | https://www.youtube.com/watch?v=KQ7Dw-739VY |
|aoe-grand-finale.txt| GRAND FINAL $10,000 AoE2 Event (The Resurgence) | medium.en | https://www.youtube.com/watch?v=jnoxjLJind4 |
|tim-dettmers | AI on your phone? Tim Dettmers on quantization of neural networks — #41 | small.en | https://www.youtube.com/watch?v=auw2hXywrU8 |
## Creating a Dataset
@@ -29,7 +30,7 @@ Download the audio track:
```
pip install yt-dlp
yt-dlp -f "bestaudio[ext=m4a]" --extract-audio 'https://www.youtube.com/watch?v=<video>'
yt-dlp -f "bestaudio[ext=m4a]" --extract-audio 'https://www.youtube.com/watch?v=4dC_nRYIDZU'
```
### Transcode with ffmpeg

View File

@@ -6,8 +6,6 @@ import fire
import yaml
from copy import copy
task_prompt = "Write a {{language}} function {{Signature}} {{Input}} that returns {{Output}}"
def prepare(TEST_LANGUAGE, path, files):
out = {}
models = []
@@ -29,9 +27,8 @@ def prepare(TEST_LANGUAGE, path, files):
continue
testid = r['name']+'-'+r['language']
task = Template(task_prompt).render(**r)
if testid not in out:
out[testid] = { 'results': {}, 'task': task, 'language': r['language'] }
out[testid] = { 'results': {}, 'task': '', 'language': r['language'] }
check_summary = ''
passing_tests = ''