From 0f5bea98b116dfb0a2781902d2c07c96627ff4fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Farkas?= Date: Sat, 20 Sep 2025 15:17:51 +0200 Subject: [PATCH] feat: update embeddings, prompt, and add streaming API --- src/llm/main.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/llm/main.py b/src/llm/main.py index 516348c..88f0b84 100644 --- a/src/llm/main.py +++ b/src/llm/main.py @@ -9,21 +9,25 @@ from langchain_community.embeddings import HuggingFaceEmbeddings # --- Configuration (Same as before) --- DB_PATH = "dune_db" -EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2" +EMBEDDING_MODEL_NAME = "nomic-ai/nomic-embed-text-v1.5" OLLAMA_API_URL = "http://localhost:11434/api/generate" OLLAMA_MODEL = "llama3:8b" PROMPT_TEMPLATE = """ -You are a helpful AI assistant and an expert on the Dune book series. -Use the following pieces of context from the books to answer the user's question. +You are an expert lore master for the Dune universe. +Your task is to answer the user's question with as much detail and context as possible, based *only* on the provided text excerpts. If you don't know the answer from the context provided, just say that you don't know, don't try to make up an answer. -Context: +Combine all the relevant information from the context below into a single, cohesive, and comprehensive answer. +Do not break the answer into sections based on the source texts. Synthesize them. +The answer should be thorough and well-explained. + +CONTEXT: {context} -Question: +QUESTION: {question} -Answer: +ANSWER: """ # --- Pydantic Models (Same as before) --- @@ -34,7 +38,7 @@ class AskRequest(BaseModel): app = FastAPI() embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME, model_kwargs={'trust_remote_code': True}) vector_store = Chroma(persist_directory=DB_PATH, embedding_function=embeddings) -retriever = vector_store.as_retriever(search_kwargs={"k": 5}) +retriever = vector_store.as_retriever(search_kwargs={"k": 8}) # --- NEW: The Streaming Endpoint --- @app.post("/ask-stream")