Add books
This commit is contained in:
5284
Books/Religeon/52 Weeks Through the Bible - James Merritt.txt
Normal file
5284
Books/Religeon/52 Weeks Through the Bible - James Merritt.txt
Normal file
File diff suppressed because it is too large
Load Diff
4985
Books/Religeon/Know Your Bible_ All 66 Books E - Paul Kent.txt
Normal file
4985
Books/Religeon/Know Your Bible_ All 66 Books E - Paul Kent.txt
Normal file
File diff suppressed because it is too large
Load Diff
57
Chartwell.py
57
Chartwell.py
@@ -80,13 +80,10 @@ import torch
|
||||
# Semantic chunking
|
||||
# Better table handling
|
||||
|
||||
|
||||
|
||||
|
||||
# -------------------------
|
||||
# Knowledge base selection
|
||||
# -------------------------
|
||||
BOOK_DIR = 'Books/History' # just a string
|
||||
BOOK_DIR = 'Books/Religeon' # just a string
|
||||
book_files = []
|
||||
|
||||
for f in Path(BOOK_DIR).rglob('*'):
|
||||
@@ -377,57 +374,71 @@ def topics_are_related(question, history, lookback=3):
|
||||
if not history:
|
||||
return False
|
||||
|
||||
# Very short questions with pronouns are almost certainly follow-ups
|
||||
q_lower = question.lower()
|
||||
# Very short questions with pronouns are almost certainly follow-ups
|
||||
pronoun_followups = {
|
||||
"he","she","they","him","her","them","his","it",
|
||||
"this","that","these","those","who","what","where","when"
|
||||
}
|
||||
q_words_all = set(q_lower.replace('?','').replace('.','').split())
|
||||
if len(q_words_all) <= 5 and q_words_all & pronoun_followups:
|
||||
print(f" [Pronoun follow-up detected — enriching]")
|
||||
return True
|
||||
|
||||
# Get meaningful words from current question
|
||||
q_words = set(q_lower.split()) - STOPWORDS
|
||||
|
||||
if not q_words:
|
||||
return False
|
||||
q_words = set(q_lower.replace('?','').replace('.','').split()) - STOPWORDS
|
||||
|
||||
# Get words from recent history questions
|
||||
recent = history[-lookback:]
|
||||
history_words = set()
|
||||
for exchange in recent:
|
||||
history_words.update(exchange["question"].lower().split())
|
||||
history_words.update(
|
||||
exchange["question"].lower().replace('?','').replace('.','').split()
|
||||
)
|
||||
history_words -= STOPWORDS
|
||||
|
||||
# Pronoun follow-up check — only if history has meaningful content
|
||||
pronoun_followups = {
|
||||
"he","she","they","him","her","them","his","it",
|
||||
"this","that","these","those"
|
||||
}
|
||||
q_words_all = set(q_lower.replace('?','').replace('.','').split())
|
||||
|
||||
# Check overlap
|
||||
if len(q_words_all) <= 5 and q_words_all & pronoun_followups:
|
||||
if history_words:
|
||||
print(f" [Pronoun follow-up detected — enriching]")
|
||||
return True
|
||||
|
||||
if not q_words:
|
||||
return False
|
||||
|
||||
# Check meaningful word overlap
|
||||
overlap = len(q_words & history_words)
|
||||
print(f" [Topic overlap: {overlap} word(s)]")
|
||||
return overlap > 0
|
||||
|
||||
|
||||
def enrich_query_with_history(question):
|
||||
"""
|
||||
Add context from recent history to improve retrieval
|
||||
for short follow-up questions.
|
||||
Skips enrichment if topic has shifted or enriched query is too long.
|
||||
"""
|
||||
if not conversation_history:
|
||||
return question
|
||||
if len(question.split()) >= 6:
|
||||
|
||||
# Only enrich questions under 8 words
|
||||
if len(question.split()) >= 8:
|
||||
return question
|
||||
|
||||
# Check if topic has shifted
|
||||
if not topics_are_related(question, conversation_history):
|
||||
print(f" [Topic shift detected — no enrichment]")
|
||||
return question
|
||||
|
||||
# Look back up to 3 exchanges for context
|
||||
recent = conversation_history[-3:]
|
||||
context_words = " ".join([ex["question"] for ex in recent])
|
||||
enriched = f"{context_words} {question}"
|
||||
|
||||
# Don't enrich if result is too long — it will overwhelm the question
|
||||
# Don't enrich if result is too long
|
||||
if len(enriched.split()) > 30:
|
||||
print(f" [Enriched query too long — using original]")
|
||||
return question
|
||||
|
||||
print(f" [Enriched query: {enriched}]")
|
||||
return enriched
|
||||
|
||||
# -------------------------
|
||||
# Retrieve top relevant chunks
|
||||
# -------------------------
|
||||
|
||||
Reference in New Issue
Block a user