File size: 594 Bytes
1286e81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from typing import List, Dict, Tuple, Optional
from dataclasses import dataclass
import numpy as np


@dataclass
class DocumentChunk:
    content: str
    page_number: int
    chunk_id: str
    start_char: int
    end_char: int


@dataclass
class RetrievalConfig:
    num_chunks: int = 5
    embedding_weight: float = 0.5
    bm25_weight: float = 0.5
    context_window: int = 3
    chunk_overlap: int = 200
    chunk_size: int = 1000


@dataclass
class ContextualizedChunk(DocumentChunk):
    context: str = ""
    embedding: Optional[np.ndarray] = None
    bm25_score: Optional[float] = None