mirror of
https://github.com/jlengrand/tldw.git
synced 2026-03-10 08:51:17 +00:00
update wip
This commit is contained in:
@@ -20,6 +20,7 @@ This project is under active development and is not ready for production use.
|
||||
|--------|-----|-------------|---|
|
||||
|ufo.txt| Subcommittee on National Security, the Border, and Foreign Affairs Hearing | small.en | https://www.youtube.com/watch?v=KQ7Dw-739VY |
|
||||
|aoe-grand-finale.txt| GRAND FINAL $10,000 AoE2 Event (The Resurgence) | medium.en | https://www.youtube.com/watch?v=jnoxjLJind4 |
|
||||
|tim-dettmers | AI on your phone? Tim Dettmers on quantization of neural networks — #41 | small.en | https://www.youtube.com/watch?v=auw2hXywrU8 |
|
||||
|
||||
## Creating a Dataset
|
||||
|
||||
@@ -29,7 +30,7 @@ Download the audio track:
|
||||
|
||||
```
|
||||
pip install yt-dlp
|
||||
yt-dlp -f "bestaudio[ext=m4a]" --extract-audio 'https://www.youtube.com/watch?v=<video>'
|
||||
yt-dlp -f "bestaudio[ext=m4a]" --extract-audio 'https://www.youtube.com/watch?v=4dC_nRYIDZU'
|
||||
```
|
||||
|
||||
### Transcode with ffmpeg
|
||||
|
||||
@@ -6,8 +6,6 @@ import fire
|
||||
import yaml
|
||||
from copy import copy
|
||||
|
||||
task_prompt = "Write a {{language}} function {{Signature}} {{Input}} that returns {{Output}}"
|
||||
|
||||
def prepare(TEST_LANGUAGE, path, files):
|
||||
out = {}
|
||||
models = []
|
||||
@@ -29,9 +27,8 @@ def prepare(TEST_LANGUAGE, path, files):
|
||||
continue
|
||||
|
||||
testid = r['name']+'-'+r['language']
|
||||
task = Template(task_prompt).render(**r)
|
||||
if testid not in out:
|
||||
out[testid] = { 'results': {}, 'task': task, 'language': r['language'] }
|
||||
out[testid] = { 'results': {}, 'task': '', 'language': r['language'] }
|
||||
|
||||
check_summary = ''
|
||||
passing_tests = ''
|
||||
|
||||
Reference in New Issue
Block a user