From 3841d5e8edb486e84c8582e9bb770c18d8149782 Mon Sep 17 00:00:00 2001 From: Mike Date: Sun, 30 Jul 2023 13:05:40 -0400 Subject: [PATCH] generic script --- chunker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chunker.py b/chunker.py index 28001d3..70545db 100755 --- a/chunker.py +++ b/chunker.py @@ -3,7 +3,7 @@ from jinja2 import Template from pathlib import Path import json -def chunk(filename, chunk_size = 1024*4, chunk_overlap = 0): +def chunker(filename, chunk_size = 1024*4, chunk_overlap = 0): from langchain.text_splitter import RecursiveCharacterTextSplitter lines = open(filename).readlines() @@ -18,7 +18,7 @@ def chunk(filename, chunk_size = 1024*4, chunk_overlap = 0): def main(text: str, template: str, chunk_size: int = 1024*4, chunk_overlap: int = 0): the_template = Template(open(template).read()) - texts = chunk(text, chunk_size=chunk_size, chunk_overlap=chunk_overlap) + texts = chunker(text, chunk_size=chunk_size, chunk_overlap=chunk_overlap) prepare = [] for idx, chunk in enumerate(texts):