File size: 1,796 Bytes
1d01073
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import os
import json
from pathlib import Path

def data_to_jsonl(input_dir, output_file):
    """Reads data from folders inside input_dir and writes to a JSONL file."""
    data = []
    
    for folder_name in sorted(os.listdir(input_dir), key=lambda x: int(x)):  # Sort numerically
        folder_path = os.path.join(input_dir, folder_name)
        if os.path.isdir(folder_path):  # Ensure it's a folder
            
            try:
                with open(os.path.join(folder_path, "question.txt"), "r", encoding="utf-8") as f:
                    question = f.read()

                with open(os.path.join(folder_path, "answer.txt"), "r", encoding="utf-8") as f:
                    answer = f.read()

                with open(os.path.join(folder_path, "code.py"), "r", encoding="utf-8") as f:
                    code = f.read()

                with open(os.path.join(folder_path, "metadata.json"), "r", encoding="utf-8") as f:
                    metadata = json.load(f)

                data.append({
                    "folder": folder_name,
                    "question": question,
                    "answer": answer,
                    "code": code,
                    "metadata": metadata
                })
            except FileNotFoundError as e:
                print(f"Skipping {folder_name} due to missing file: {e}")

    with open(output_file, "w", encoding="utf-8") as f:
        for entry in data:
            f.write(json.dumps(entry, ensure_ascii=False) + "\n")

    print(f"Data successfully written to {output_file}")

if __name__ == "__main__":
    outputfile = input('Enter the name of file without .jsonl : ')

    script_dir = Path(__file__).parent
    input_dir = script_dir.parent / "data/questions"
    data_to_jsonl(input_dir, f'{outputfile}.jsonl')