Commit latest
This commit is contained in:
50
Chartwell.py
50
Chartwell.py
@@ -6,11 +6,17 @@ import re
|
||||
import numpy as np
|
||||
import json
|
||||
from pathlib import Path
|
||||
import torch
|
||||
|
||||
# Retrieval — find the most relevant chunks from your documents using embeddings and cosine similarity
|
||||
# Augmented — add that retrieved context to the prompt
|
||||
# Generation — use the language model to generate an answer based on that context
|
||||
|
||||
# ---------------
|
||||
# Running
|
||||
# --------------
|
||||
# python Chartwell.py
|
||||
|
||||
# --------------------------
|
||||
# GIT Configuration
|
||||
# ---------------------------
|
||||
@@ -20,6 +26,10 @@ from pathlib import Path
|
||||
# git config --global user.email "skessler1964@gmail.com"
|
||||
|
||||
|
||||
# Chartwell.py now has both models on GPU:
|
||||
#
|
||||
# GPT4All (Llama 3) — GPU for inference
|
||||
# SentenceTransformer — GPU for embeddings
|
||||
|
||||
# IMPORTANT SETUP STEPS FOR RE-CREATING THIS ENVIORNMENT
|
||||
# 1) Install python
|
||||
@@ -46,6 +56,12 @@ from pathlib import Path
|
||||
# pip install -r requirements.txt
|
||||
|
||||
|
||||
# Torch GPU version
|
||||
# pip uninstall torch -y
|
||||
# pip install torch --index-url https://download.pytorch.org/whl/cu124 --force-reinstall
|
||||
# python -c "import torch; print(torch.__version__); print(torch.cuda.is_available())"
|
||||
# witness : 2.x.x+cu124 True for CUDA
|
||||
|
||||
# Still on the to-do list:
|
||||
# Fix the enrichment length cap
|
||||
# Semantic chunking
|
||||
@@ -114,7 +130,9 @@ LEVELS = {
|
||||
# Load the sentence tranformer model
|
||||
# -----------------------------------
|
||||
print("Loading embedding model...")
|
||||
embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
print(f"Embedding model using: {device}")
|
||||
embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2",device=device)
|
||||
|
||||
# -----------------------------------
|
||||
# Load the language model - If it does not exist in the download area then download it otherwise us it.
|
||||
@@ -145,9 +163,11 @@ def clean_text(text):
|
||||
text = re.sub(r'(\w+)-\n(\w+)', r'\1\2', text)
|
||||
text = re.sub(r'\n+', ' ', text)
|
||||
text = re.sub(r'(?<=[a-z])(\d{1,3})(?=\s[A-Z])', '', text)
|
||||
text = re.sub(r'\s\d{1,4}\s', ' ', text)
|
||||
# text = re.sub(r'\s\d{1,4}\s', ' ', text)
|
||||
text = re.sub(r'[■•◆▪→]', '', text)
|
||||
text = re.sub(r' +', ' ', text)
|
||||
text = re.sub(r'\[\d+\]', '', text)
|
||||
text = re.sub(r'\[citation needed\]', '', text)
|
||||
return text.strip()
|
||||
|
||||
# -------------------------
|
||||
@@ -486,24 +506,18 @@ def ask_question(question, show_sources=False, filter_term=None):
|
||||
f"Only say 'I don't know' if the context contains absolutely nothing relevant. "
|
||||
f"Do not reference outside sources. "
|
||||
f"Do not repeat or echo the conversation history in your answer. "
|
||||
f"Do not include 'Context:' or 'Q:' or 'A:' labels in your answer.\n\n"
|
||||
f"Do not include separator lines or notes about your sources in your answer. "
|
||||
)
|
||||
|
||||
f"Do not include labels, separator lines, or notes in your answer. "
|
||||
f"Stop immediately after answering.\n\n"
|
||||
)
|
||||
|
||||
if history_text:
|
||||
prompt += (
|
||||
f"--- BACKGROUND ONLY - DO NOT REPEAT ---\n"
|
||||
f"{history_text}"
|
||||
f"--- END BACKGROUND ---\n\n"
|
||||
)
|
||||
|
||||
prompt += f"HISTORY:\n{history_text}\n"
|
||||
|
||||
prompt += (
|
||||
f"--- REFERENCE CONTEXT ---\n"
|
||||
f"{context}\n"
|
||||
f"--- END CONTEXT ---\n\n"
|
||||
f"Question: {question}\n\n"
|
||||
f"Answer:"
|
||||
)
|
||||
f"CONTEXT:\n{context}\n\n"
|
||||
f"QUESTION: {question}\n\n"
|
||||
f"ANSWER:"
|
||||
)
|
||||
|
||||
with lm_model.chat_session():
|
||||
response = lm_model.generate(prompt, max_tokens=level_cfg["max_tokens"])
|
||||
|
||||
Reference in New Issue
Block a user