Spaces:

m-ric
/

chunk_visualizer

Running

App Files Files Community

m-ric HF staff commited on Feb 19, 2024

Commit

31e3c37

verified ·

1 Parent(s): 83f2644

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -8

app.py CHANGED Viewed

@@ -17,9 +17,8 @@ def length_tokens(txt):
 def extract_separators_from_string(separators_str):
-    print('Received:', type(separators_str), 'with value', repr(separators_str))
     try:
-        separators_str = separators_str.replace("\\n", "\n").replace("\\t", "\t") # fix special characters
         separators = separators_str[1:-1].split(", ")
         return [separator.replace('"', "").replace("'", "") for separator in separators]
     except Exception as e:
@@ -47,7 +46,6 @@ def chunk(text, length, splitter_selection, separators_str, length_unit_selectio
             separator=" ",
         )
     elif splitter_selection == LABEL_RECURSIVE:
-        print('Splitting with separators:', ',,'.join([repr(el) for el in separators]), f',and chunk length {length} and chunk overlap {chunk_overlap}')
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=length,
             chunk_overlap=int(chunk_overlap),
@@ -55,14 +53,9 @@ def chunk(text, length, splitter_selection, separators_str, length_unit_selectio
             strip_whitespace=False,
             separators=separators,
         )
-        print(text_splitter._separators)
     splits = text_splitter.create_documents([text])
     text_splits = [split.page_content for split in splits]
-    print('I did splits:')
-    print(text_splits)
     unoverlapped_text_splits = unoverlap_list(text_splits)
     output = [((split[0], 'Overlap') if split[1] else (split[0], f"Chunk {str(i)}")) for i, split in enumerate(unoverlapped_text_splits)]
     return output

 def extract_separators_from_string(separators_str):
     try:
+        separators_str = separators_str.replace("\\n", "\n").replace("\\t", "\t").replace("\\\\", "\\") # fix special characters
         separators = separators_str[1:-1].split(", ")
         return [separator.replace('"', "").replace("'", "") for separator in separators]
     except Exception as e:
             separator=" ",
         )
     elif splitter_selection == LABEL_RECURSIVE:
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=length,
             chunk_overlap=int(chunk_overlap),
             strip_whitespace=False,
             separators=separators,
         )
     splits = text_splitter.create_documents([text])
     text_splits = [split.page_content for split in splits]
     unoverlapped_text_splits = unoverlap_list(text_splits)
     output = [((split[0], 'Overlap') if split[1] else (split[0], f"Chunk {str(i)}")) for i, split in enumerate(unoverlapped_text_splits)]
     return output