Commit latest

This commit is contained in:
2026-04-03 19:04:37 -04:00
parent 3110db8165
commit 5e5d8cbcb6
3 changed files with 33 additions and 159 deletions

View File

@@ -6,11 +6,17 @@ import re
import numpy as np
import json
from pathlib import Path
import torch
# Retrieval — find the most relevant chunks from your documents using embeddings and cosine similarity
# Augmented — add that retrieved context to the prompt
# Generation — use the language model to generate an answer based on that context
# ---------------
# Running
# --------------
# python Chartwell.py
# --------------------------
# GIT Configuration
# ---------------------------
@@ -20,6 +26,10 @@ from pathlib import Path
# git config --global user.email "skessler1964@gmail.com"
# Chartwell.py now has both models on GPU:
#
# GPT4All (Llama 3) — GPU for inference
# SentenceTransformer — GPU for embeddings
# IMPORTANT SETUP STEPS FOR RE-CREATING THIS ENVIORNMENT
# 1) Install python
@@ -46,6 +56,12 @@ from pathlib import Path
# pip install -r requirements.txt
# Torch GPU version
# pip uninstall torch -y
# pip install torch --index-url https://download.pytorch.org/whl/cu124 --force-reinstall
# python -c "import torch; print(torch.__version__); print(torch.cuda.is_available())"
# witness : 2.x.x+cu124 True for CUDA
# Still on the to-do list:
# Fix the enrichment length cap
# Semantic chunking
@@ -114,7 +130,9 @@ LEVELS = {
# Load the sentence tranformer model
# -----------------------------------
print("Loading embedding model...")
embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Embedding model using: {device}")
embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2",device=device)
# -----------------------------------
# Load the language model - If it does not exist in the download area then download it otherwise us it.
@@ -145,9 +163,11 @@ def clean_text(text):
text = re.sub(r'(\w+)-\n(\w+)', r'\1\2', text)
text = re.sub(r'\n+', ' ', text)
text = re.sub(r'(?<=[a-z])(\d{1,3})(?=\s[A-Z])', '', text)
text = re.sub(r'\s\d{1,4}\s', ' ', text)
# text = re.sub(r'\s\d{1,4}\s', ' ', text)
text = re.sub(r'[■•◆▪→]', '', text)
text = re.sub(r' +', ' ', text)
text = re.sub(r'\[\d+\]', '', text)
text = re.sub(r'\[citation needed\]', '', text)
return text.strip()
# -------------------------
@@ -486,24 +506,18 @@ def ask_question(question, show_sources=False, filter_term=None):
f"Only say 'I don't know' if the context contains absolutely nothing relevant. "
f"Do not reference outside sources. "
f"Do not repeat or echo the conversation history in your answer. "
f"Do not include 'Context:' or 'Q:' or 'A:' labels in your answer.\n\n"
f"Do not include separator lines or notes about your sources in your answer. "
)
f"Do not include labels, separator lines, or notes in your answer. "
f"Stop immediately after answering.\n\n"
)
if history_text:
prompt += (
f"--- BACKGROUND ONLY - DO NOT REPEAT ---\n"
f"{history_text}"
f"--- END BACKGROUND ---\n\n"
)
prompt += f"HISTORY:\n{history_text}\n"
prompt += (
f"--- REFERENCE CONTEXT ---\n"
f"{context}\n"
f"--- END CONTEXT ---\n\n"
f"Question: {question}\n\n"
f"Answer:"
)
f"CONTEXT:\n{context}\n\n"
f"QUESTION: {question}\n\n"
f"ANSWER:"
)
with lm_model.chat_session():
response = lm_model.generate(prompt, max_tokens=level_cfg["max_tokens"])