Add books

2026-04-04 22:56:29 -04:00
parent 3ccc529045
commit 535e07a61c
3 changed files with 10303 additions and 23 deletions
--- a/Books/Religeon/52
+++ b/Books/Religeon/52
--- a/Books/Religeon/Know
+++ b/Books/Religeon/Know
--- a/Chartwell.py
+++ b/Chartwell.py
@@ -80,13 +80,10 @@ import torch
 # Semantic chunking
 # Better table handling
 # -------------------------
 # Knowledge base selection
 # -------------------------
-BOOK_DIR = 'Books/History'  # just a string
+BOOK_DIR = 'Books/Religeon'  # just a string
 book_files = []
 for f in Path(BOOK_DIR).rglob('*'):
@@ -377,57 +374,71 @@ def topics_are_related(question, history, lookback=3):
    if not history:
        return False
    # Very short questions with pronouns are almost certainly follow-ups
    q_lower = question.lower()
 # Very short questions with pronouns are almost certainly follow-ups
    pronoun_followups = {
        "he","she","they","him","her","them","his","it",
        "this","that","these","those","who","what","where","when"
    }
    q_words_all = set(q_lower.replace('?','').replace('.','').split())
    if len(q_words_all) <= 5 and q_words_all & pronoun_followups:
        print(f"  [Pronoun follow-up detected — enriching]")
        return True    
    # Get meaningful words from current question
-    q_words = set(q_lower.split()) - STOPWORDS
+    q_words = set(q_lower.replace('?','').replace('.','').split()) - STOPWORDS
    if not q_words:
        return False
    # Get words from recent history questions
    recent = history[-lookback:]
    history_words = set()
    for exchange in recent:
-        history_words.update(exchange["question"].lower().split())
+        history_words.update(
            exchange["question"].lower().replace('?','').replace('.','').split()
        )
    history_words -= STOPWORDS
    # Pronoun follow-up check — only if history has meaningful content
    pronoun_followups = {
        "he","she","they","him","her","them","his","it",
        "this","that","these","those"
    }
    q_words_all = set(q_lower.replace('?','').replace('.','').split())
-    # Check overlap
+    if len(q_words_all) <= 5 and q_words_all & pronoun_followups:
        if history_words:
            print(f"  [Pronoun follow-up detected — enriching]")
            return True
    if not q_words:
        return False
    # Check meaningful word overlap
    overlap = len(q_words & history_words)
    print(f"  [Topic overlap: {overlap} word(s)]")
    return overlap > 0
 def enrich_query_with_history(question):
    """
    Add context from recent history to improve retrieval 
    for short follow-up questions.
    Skips enrichment if topic has shifted or enriched query is too long.
    """
    if not conversation_history:
        return question
-    if len(question.split()) >= 6:
+    
    # Only enrich questions under 8 words
    if len(question.split()) >= 8:
        return question
    # Check if topic has shifted
    if not topics_are_related(question, conversation_history):
        print(f"  [Topic shift detected — no enrichment]")
        return question
    # Look back up to 3 exchanges for context
    recent = conversation_history[-3:]
    context_words = " ".join([ex["question"] for ex in recent])
    enriched = f"{context_words} {question}"
-    # Don't enrich if result is too long — it will overwhelm the question
+    # Don't enrich if result is too long
    if len(enriched.split()) > 30:
        print(f"  [Enriched query too long — using original]")
        return question
    print(f"  [Enriched query: {enriched}]")
    return enriched
 # -------------------------
 # Retrieve top relevant chunks
 # -------------------------