File size: 3,064 Bytes
488f910
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from typing import List, Dict, Optional
from tqdm import tqdm

def load_and_setup_db(
    persist_directory: str,
    embeddings
) -> Chroma:
    """
    Load the previously created ChromaDB with the same embedding model.
    
    Args:
        persist_directory: Directory where the database is stored
        embedding_model_name: Name of the embedding model to use
        
    Returns:
        Chroma: Loaded vector store
    """
    
    # Load the existing database
    vectorstore = Chroma(
        embedding_function=embeddings,
        persist_directory=persist_directory
    )
    
    return vectorstore

def search_cases(
    vectorstore: Chroma,
    query: str,
    k: int = 5,
    metadata_filter: Optional[Dict] = None,
    score_threshold: Optional[float] = 0.0
) -> List[Dict]:
    """
    Search the database for relevant cases.
    
    Args:
        vectorstore: Loaded Chroma vector store
        query: Search query text
        k: Number of results to return
        metadata_filter: Optional filter for metadata fields
        score_threshold: Minimum similarity score threshold
        
    Returns:
        List of relevant documents with scores and metadata
    """
    # Perform similarity search with metadata filtering
    docs_and_scores = vectorstore.similarity_search_with_score(
        query,
        k=k,
        filter=metadata_filter
    )
    
    # Process and filter results
    results = []
    for doc, score in docs_and_scores:
        # Convert score to similarity (assuming distance score)
        similarity = 1 - score
        
        # Apply score threshold
        if score_threshold and similarity < score_threshold:
            continue
            
        result = {
            'content': doc.page_content,
            'metadata': doc.metadata,
            'similarity_score': round(similarity, 4)
        }
        results.append(result)
    if len(results)==0 and len(docs_and_scores)>0:
        results.append(docs_and_scores[0])
    return results

# Example usage function
def search_and_display_results(
    vectorstore: Chroma,
    query: str,
    k: int = 5,
    metadata_filter: Optional[Dict] = None,
    score_threshold: float = 0.7
) -> None:
    """
    Search and display results in a formatted way.
    """
    print(f"\nSearching for: {query}")
    print("-" * 50)
    
    results = search_cases(
        vectorstore=vectorstore,
        query=query,
        k=k,
        metadata_filter=metadata_filter,
        score_threshold=score_threshold
    )
    
    if not results:
        print("No matching results found.")
        return
        
    print(f"Found {len(results)} relevant matches:\n")
    
    for i, result in enumerate(results, 1):
        print(f"Match {i}:")
        print(f"Similarity Score: {result['similarity_score']}")
        print(f"Metadata: {result['metadata']}")
        print(f"Content: {result['content'][:200]}...")  # Show first 200 chars
        print("-" * 50)