Add books
This commit is contained in:
5284
Books/Religeon/52 Weeks Through the Bible - James Merritt.txt
Normal file
5284
Books/Religeon/52 Weeks Through the Bible - James Merritt.txt
Normal file
File diff suppressed because it is too large
Load Diff
4985
Books/Religeon/Know Your Bible_ All 66 Books E - Paul Kent.txt
Normal file
4985
Books/Religeon/Know Your Bible_ All 66 Books E - Paul Kent.txt
Normal file
File diff suppressed because it is too large
Load Diff
57
Chartwell.py
57
Chartwell.py
@@ -80,13 +80,10 @@ import torch
|
|||||||
# Semantic chunking
|
# Semantic chunking
|
||||||
# Better table handling
|
# Better table handling
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# -------------------------
|
# -------------------------
|
||||||
# Knowledge base selection
|
# Knowledge base selection
|
||||||
# -------------------------
|
# -------------------------
|
||||||
BOOK_DIR = 'Books/History' # just a string
|
BOOK_DIR = 'Books/Religeon' # just a string
|
||||||
book_files = []
|
book_files = []
|
||||||
|
|
||||||
for f in Path(BOOK_DIR).rglob('*'):
|
for f in Path(BOOK_DIR).rglob('*'):
|
||||||
@@ -377,57 +374,71 @@ def topics_are_related(question, history, lookback=3):
|
|||||||
if not history:
|
if not history:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Very short questions with pronouns are almost certainly follow-ups
|
|
||||||
q_lower = question.lower()
|
q_lower = question.lower()
|
||||||
# Very short questions with pronouns are almost certainly follow-ups
|
|
||||||
pronoun_followups = {
|
|
||||||
"he","she","they","him","her","them","his","it",
|
|
||||||
"this","that","these","those","who","what","where","when"
|
|
||||||
}
|
|
||||||
q_words_all = set(q_lower.replace('?','').replace('.','').split())
|
|
||||||
if len(q_words_all) <= 5 and q_words_all & pronoun_followups:
|
|
||||||
print(f" [Pronoun follow-up detected — enriching]")
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Get meaningful words from current question
|
# Get meaningful words from current question
|
||||||
q_words = set(q_lower.split()) - STOPWORDS
|
q_words = set(q_lower.replace('?','').replace('.','').split()) - STOPWORDS
|
||||||
|
|
||||||
if not q_words:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Get words from recent history questions
|
# Get words from recent history questions
|
||||||
recent = history[-lookback:]
|
recent = history[-lookback:]
|
||||||
history_words = set()
|
history_words = set()
|
||||||
for exchange in recent:
|
for exchange in recent:
|
||||||
history_words.update(exchange["question"].lower().split())
|
history_words.update(
|
||||||
|
exchange["question"].lower().replace('?','').replace('.','').split()
|
||||||
|
)
|
||||||
history_words -= STOPWORDS
|
history_words -= STOPWORDS
|
||||||
|
|
||||||
# Check overlap
|
# Pronoun follow-up check — only if history has meaningful content
|
||||||
|
pronoun_followups = {
|
||||||
|
"he","she","they","him","her","them","his","it",
|
||||||
|
"this","that","these","those"
|
||||||
|
}
|
||||||
|
q_words_all = set(q_lower.replace('?','').replace('.','').split())
|
||||||
|
|
||||||
|
if len(q_words_all) <= 5 and q_words_all & pronoun_followups:
|
||||||
|
if history_words:
|
||||||
|
print(f" [Pronoun follow-up detected — enriching]")
|
||||||
|
return True
|
||||||
|
|
||||||
|
if not q_words:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check meaningful word overlap
|
||||||
overlap = len(q_words & history_words)
|
overlap = len(q_words & history_words)
|
||||||
print(f" [Topic overlap: {overlap} word(s)]")
|
print(f" [Topic overlap: {overlap} word(s)]")
|
||||||
return overlap > 0
|
return overlap > 0
|
||||||
|
|
||||||
|
|
||||||
def enrich_query_with_history(question):
|
def enrich_query_with_history(question):
|
||||||
|
"""
|
||||||
|
Add context from recent history to improve retrieval
|
||||||
|
for short follow-up questions.
|
||||||
|
Skips enrichment if topic has shifted or enriched query is too long.
|
||||||
|
"""
|
||||||
if not conversation_history:
|
if not conversation_history:
|
||||||
return question
|
return question
|
||||||
if len(question.split()) >= 6:
|
|
||||||
|
# Only enrich questions under 8 words
|
||||||
|
if len(question.split()) >= 8:
|
||||||
return question
|
return question
|
||||||
|
|
||||||
|
# Check if topic has shifted
|
||||||
if not topics_are_related(question, conversation_history):
|
if not topics_are_related(question, conversation_history):
|
||||||
print(f" [Topic shift detected — no enrichment]")
|
print(f" [Topic shift detected — no enrichment]")
|
||||||
return question
|
return question
|
||||||
|
|
||||||
|
# Look back up to 3 exchanges for context
|
||||||
recent = conversation_history[-3:]
|
recent = conversation_history[-3:]
|
||||||
context_words = " ".join([ex["question"] for ex in recent])
|
context_words = " ".join([ex["question"] for ex in recent])
|
||||||
enriched = f"{context_words} {question}"
|
enriched = f"{context_words} {question}"
|
||||||
|
|
||||||
# Don't enrich if result is too long — it will overwhelm the question
|
# Don't enrich if result is too long
|
||||||
if len(enriched.split()) > 30:
|
if len(enriched.split()) > 30:
|
||||||
print(f" [Enriched query too long — using original]")
|
print(f" [Enriched query too long — using original]")
|
||||||
return question
|
return question
|
||||||
|
|
||||||
print(f" [Enriched query: {enriched}]")
|
print(f" [Enriched query: {enriched}]")
|
||||||
return enriched
|
return enriched
|
||||||
|
|
||||||
# -------------------------
|
# -------------------------
|
||||||
# Retrieve top relevant chunks
|
# Retrieve top relevant chunks
|
||||||
# -------------------------
|
# -------------------------
|
||||||
|
|||||||
Reference in New Issue
Block a user