feat: update embeddings, prompt, and add streaming API

This commit is contained in:
2025-09-20 15:17:51 +02:00
parent 6e95b59a3e
commit 0f5bea98b1

View File

@@ -9,21 +9,25 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
# --- Configuration (Same as before) ---
DB_PATH = "dune_db"
EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
EMBEDDING_MODEL_NAME = "nomic-ai/nomic-embed-text-v1.5"
OLLAMA_API_URL = "http://localhost:11434/api/generate"
OLLAMA_MODEL = "llama3:8b"
PROMPT_TEMPLATE = """
You are a helpful AI assistant and an expert on the Dune book series.
Use the following pieces of context from the books to answer the user's question.
You are an expert lore master for the Dune universe.
Your task is to answer the user's question with as much detail and context as possible, based *only* on the provided text excerpts.
If you don't know the answer from the context provided, just say that you don't know, don't try to make up an answer.
Context:
Combine all the relevant information from the context below into a single, cohesive, and comprehensive answer.
Do not break the answer into sections based on the source texts. Synthesize them.
The answer should be thorough and well-explained.
CONTEXT:
{context}
Question:
QUESTION:
{question}
Answer:
ANSWER:
"""
# --- Pydantic Models (Same as before) ---
@@ -34,7 +38,7 @@ class AskRequest(BaseModel):
app = FastAPI()
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME, model_kwargs={'trust_remote_code': True})
vector_store = Chroma(persist_directory=DB_PATH, embedding_function=embeddings)
retriever = vector_store.as_retriever(search_kwargs={"k": 5})
retriever = vector_store.as_retriever(search_kwargs={"k": 8})
# --- NEW: The Streaming Endpoint ---
@app.post("/ask-stream")