Commit latest

2026-04-03 19:04:37 -04:00
parent 3110db8165
commit 5e5d8cbcb6
3 changed files with 33 additions and 159 deletions
--- a/Chartwell.py
+++ b/Chartwell.py
@@ -6,11 +6,17 @@ import re
 import numpy as np
 import json
 from pathlib import Path
+import torch

 # Retrieval — find the most relevant chunks from your documents using embeddings and cosine similarity
 # Augmented — add that retrieved context to the prompt
 # Generation — use the language model to generate an answer based on that context

+# ---------------
+# Running
+# --------------
+# python Chartwell.py
+
 # --------------------------
 # GIT Configuration
 # ---------------------------
@@ -20,6 +26,10 @@ from pathlib import Path
 # git config --global user.email "skessler1964@gmail.com"


+# Chartwell.py now has both models on GPU:
+#
+# GPT4All (Llama 3) — GPU for inference
+# SentenceTransformer — GPU for embeddings

 # IMPORTANT SETUP STEPS FOR RE-CREATING THIS ENVIORNMENT
 # 1) Install python
@@ -46,6 +56,12 @@ from pathlib import Path
 # pip install -r requirements.txt


+# Torch GPU version
+# pip uninstall torch -y
+# pip install torch --index-url https://download.pytorch.org/whl/cu124 --force-reinstall
+# python -c "import torch; print(torch.__version__); print(torch.cuda.is_available())"
+# witness :  2.x.x+cu124 True  for CUDA
+
 # Still on the to-do list:
 # Fix the enrichment length cap
 # Semantic chunking
@@ -114,7 +130,9 @@ LEVELS = {
 # Load the sentence tranformer model
 # -----------------------------------
 print("Loading embedding model...")
-embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Embedding model using: {device}")
+embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2",device=device)

 # -----------------------------------
 # Load the language model - If it does not exist in the download area then download it otherwise us it.
@@ -145,9 +163,11 @@ def clean_text(text):
    text = re.sub(r'(\w+)-\n(\w+)', r'\1\2', text)
    text = re.sub(r'\n+', ' ', text)
    text = re.sub(r'(?<=[a-z])(\d{1,3})(?=\s[A-Z])', '', text)
-    text = re.sub(r'\s\d{1,4}\s', ' ', text)
+#    text = re.sub(r'\s\d{1,4}\s', ' ', text)
    text = re.sub(r'[■•◆▪→]', '', text)
    text = re.sub(r' +', ' ', text)
+    text = re.sub(r'\[\d+\]', '', text)
+    text = re.sub(r'\[citation needed\]', '', text)    
    return text.strip()

 # -------------------------
@@ -486,24 +506,18 @@ def ask_question(question, show_sources=False, filter_term=None):
            f"Only say 'I don't know' if the context contains absolutely nothing relevant. "
            f"Do not reference outside sources. "
            f"Do not repeat or echo the conversation history in your answer. "
-            f"Do not include 'Context:' or 'Q:' or 'A:' labels in your answer.\n\n"
-            f"Do not include separator lines or notes about your sources in your answer. "
-        )  
-    
+            f"Do not include labels, separator lines, or notes in your answer. "
+            f"Stop immediately after answering.\n\n"
+        )
+
    if history_text:
-            prompt += (
-                f"--- BACKGROUND ONLY - DO NOT REPEAT ---\n"
-                f"{history_text}"
-                f"--- END BACKGROUND ---\n\n"
-            )    
-            
+        prompt += f"HISTORY:\n{history_text}\n"
+
    prompt += (
-            f"--- REFERENCE CONTEXT ---\n"
-            f"{context}\n"
-            f"--- END CONTEXT ---\n\n"
-            f"Question: {question}\n\n"
-            f"Answer:"
-        )              
+        f"CONTEXT:\n{context}\n\n"
+        f"QUESTION: {question}\n\n"
+        f"ANSWER:"
+    )            

    with lm_model.chat_session():
        response = lm_model.generate(prompt, max_tokens=level_cfg["max_tokens"])