File size: 3,064 Bytes
488f910 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from typing import List, Dict, Optional
from tqdm import tqdm
def load_and_setup_db(
persist_directory: str,
embeddings
) -> Chroma:
"""
Load the previously created ChromaDB with the same embedding model.
Args:
persist_directory: Directory where the database is stored
embedding_model_name: Name of the embedding model to use
Returns:
Chroma: Loaded vector store
"""
# Load the existing database
vectorstore = Chroma(
embedding_function=embeddings,
persist_directory=persist_directory
)
return vectorstore
def search_cases(
vectorstore: Chroma,
query: str,
k: int = 5,
metadata_filter: Optional[Dict] = None,
score_threshold: Optional[float] = 0.0
) -> List[Dict]:
"""
Search the database for relevant cases.
Args:
vectorstore: Loaded Chroma vector store
query: Search query text
k: Number of results to return
metadata_filter: Optional filter for metadata fields
score_threshold: Minimum similarity score threshold
Returns:
List of relevant documents with scores and metadata
"""
# Perform similarity search with metadata filtering
docs_and_scores = vectorstore.similarity_search_with_score(
query,
k=k,
filter=metadata_filter
)
# Process and filter results
results = []
for doc, score in docs_and_scores:
# Convert score to similarity (assuming distance score)
similarity = 1 - score
# Apply score threshold
if score_threshold and similarity < score_threshold:
continue
result = {
'content': doc.page_content,
'metadata': doc.metadata,
'similarity_score': round(similarity, 4)
}
results.append(result)
if len(results)==0 and len(docs_and_scores)>0:
results.append(docs_and_scores[0])
return results
# Example usage function
def search_and_display_results(
vectorstore: Chroma,
query: str,
k: int = 5,
metadata_filter: Optional[Dict] = None,
score_threshold: float = 0.7
) -> None:
"""
Search and display results in a formatted way.
"""
print(f"\nSearching for: {query}")
print("-" * 50)
results = search_cases(
vectorstore=vectorstore,
query=query,
k=k,
metadata_filter=metadata_filter,
score_threshold=score_threshold
)
if not results:
print("No matching results found.")
return
print(f"Found {len(results)} relevant matches:\n")
for i, result in enumerate(results, 1):
print(f"Match {i}:")
print(f"Similarity Score: {result['similarity_score']}")
print(f"Metadata: {result['metadata']}")
print(f"Content: {result['content'][:200]}...") # Show first 200 chars
print("-" * 50) |