feat: update embeddings, prompt, and add streaming API

2025-09-20 15:17:51 +02:00
parent 6e95b59a3e
commit 0f5bea98b1
1 changed files with 11 additions and 7 deletions
--- a/src/llm/main.py
+++ b/src/llm/main.py
@@ -9,21 +9,25 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
 # --- Configuration (Same as before) ---
 DB_PATH = "dune_db"
-EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
+EMBEDDING_MODEL_NAME = "nomic-ai/nomic-embed-text-v1.5"
 OLLAMA_API_URL = "http://localhost:11434/api/generate"
 OLLAMA_MODEL = "llama3:8b"
 PROMPT_TEMPLATE = """
-You are a helpful AI assistant and an expert on the Dune book series.
+You are an expert lore master for the Dune universe.
-Use the following pieces of context from the books to answer the user's question.
+Your task is to answer the user's question with as much detail and context as possible, based *only* on the provided text excerpts.
 If you don't know the answer from the context provided, just say that you don't know, don't try to make up an answer.
-Context:
+Combine all the relevant information from the context below into a single, cohesive, and comprehensive answer.
 Do not break the answer into sections based on the source texts. Synthesize them.
 The answer should be thorough and well-explained.
 CONTEXT:
 {context}
-Question:
+QUESTION:
 {question}
-Answer:
+ANSWER:
 """
 # --- Pydantic Models (Same as before) ---
@@ -34,7 +38,7 @@ class AskRequest(BaseModel):
 app = FastAPI()
 embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME, model_kwargs={'trust_remote_code': True})
 vector_store = Chroma(persist_directory=DB_PATH, embedding_function=embeddings)
-retriever = vector_store.as_retriever(search_kwargs={"k": 5})
+retriever = vector_store.as_retriever(search_kwargs={"k": 8})
 # --- NEW: The Streaming Endpoint ---
@app.post("/ask-stream")