Add books

2026-04-04 22:56:29 -04:00
parent 3ccc529045
commit 535e07a61c
3 changed files with 10303 additions and 23 deletions
--- a/Books/Religeon/52
+++ b/Books/Religeon/52
--- a/Books/Religeon/Know
+++ b/Books/Religeon/Know
--- a/Chartwell.py
+++ b/Chartwell.py
@@ -80,13 +80,10 @@ import torch
 # Semantic chunking
 # Better table handling

-
-
-
 # -------------------------
 # Knowledge base selection
 # -------------------------
-BOOK_DIR = 'Books/History'  # just a string
+BOOK_DIR = 'Books/Religeon'  # just a string
 book_files = []

 for f in Path(BOOK_DIR).rglob('*'):
@@ -377,57 +374,71 @@ def topics_are_related(question, history, lookback=3):
    if not history:
        return False
    
-    # Very short questions with pronouns are almost certainly follow-ups
    q_lower = question.lower()
-# Very short questions with pronouns are almost certainly follow-ups
-    pronoun_followups = {
-        "he","she","they","him","her","them","his","it",
-        "this","that","these","those","who","what","where","when"
-    }
-    q_words_all = set(q_lower.replace('?','').replace('.','').split())
-    if len(q_words_all) <= 5 and q_words_all & pronoun_followups:
-        print(f"  [Pronoun follow-up detected — enriching]")
-        return True    
    
    # Get meaningful words from current question
-    q_words = set(q_lower.split()) - STOPWORDS
-    
-    if not q_words:
-        return False
+    q_words = set(q_lower.replace('?','').replace('.','').split()) - STOPWORDS
    
    # Get words from recent history questions
    recent = history[-lookback:]
    history_words = set()
    for exchange in recent:
-        history_words.update(exchange["question"].lower().split())
+        history_words.update(
+            exchange["question"].lower().replace('?','').replace('.','').split()
+        )
    history_words -= STOPWORDS
+
+    # Pronoun follow-up check — only if history has meaningful content
+    pronoun_followups = {
+        "he","she","they","him","her","them","his","it",
+        "this","that","these","those"
+    }
+    q_words_all = set(q_lower.replace('?','').replace('.','').split())
    
-    # Check overlap
+    if len(q_words_all) <= 5 and q_words_all & pronoun_followups:
+        if history_words:
+            print(f"  [Pronoun follow-up detected — enriching]")
+            return True
+    
+    if not q_words:
+        return False
+    
+    # Check meaningful word overlap
    overlap = len(q_words & history_words)
    print(f"  [Topic overlap: {overlap} word(s)]")
    return overlap > 0

+
 def enrich_query_with_history(question):
+    """
+    Add context from recent history to improve retrieval 
+    for short follow-up questions.
+    Skips enrichment if topic has shifted or enriched query is too long.
+    """
    if not conversation_history:
        return question
-    if len(question.split()) >= 6:
+    
+    # Only enrich questions under 8 words
+    if len(question.split()) >= 8:
        return question
+    
+    # Check if topic has shifted
    if not topics_are_related(question, conversation_history):
        print(f"  [Topic shift detected — no enrichment]")
        return question
    
+    # Look back up to 3 exchanges for context
    recent = conversation_history[-3:]
    context_words = " ".join([ex["question"] for ex in recent])
    enriched = f"{context_words} {question}"
    
-    # Don't enrich if result is too long — it will overwhelm the question
+    # Don't enrich if result is too long
    if len(enriched.split()) > 30:
        print(f"  [Enriched query too long — using original]")
        return question
        
    print(f"  [Enriched query: {enriched}]")
    return enriched
-
 # -------------------------
 # Retrieve top relevant chunks
 # -------------------------