Spaces:
Running
Running
File size: 594 Bytes
1286e81 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
from typing import List, Dict, Tuple, Optional
from dataclasses import dataclass
import numpy as np
@dataclass
class DocumentChunk:
content: str
page_number: int
chunk_id: str
start_char: int
end_char: int
@dataclass
class RetrievalConfig:
num_chunks: int = 5
embedding_weight: float = 0.5
bm25_weight: float = 0.5
context_window: int = 3
chunk_overlap: int = 200
chunk_size: int = 1000
@dataclass
class ContextualizedChunk(DocumentChunk):
context: str = ""
embedding: Optional[np.ndarray] = None
bm25_score: Optional[float] = None
|