Reorganize the corpus. Prevent answers from being truncated mid-sentence.

2026-04-06 12:24:05 -04:00
parent 2e3dd2fff0
commit cb7d906ef7
9 changed files with 26 additions and 10 deletions
--- a/Books/Procedure/ANNETTE
+++ b/Books/Procedure/ANNETTE
--- a/Books/Procedure/Chapter
+++ b/Books/Procedure/Chapter
--- a/Books/Procedure/Chapter
+++ b/Books/Procedure/Chapter
--- a/Books/Procedure/Chapter
+++ b/Books/Procedure/Chapter
--- a/Books/Procedure/Chapter
+++ b/Books/Procedure/Chapter
--- a/Books/Procedure/Defend
+++ b/Books/Procedure/Defend
--- a/Books/Procedure/Florida
+++ b/Books/Procedure/Florida
--- a/Books/Procedure/Patrick
+++ b/Books/Procedure/Patrick
--- a/Chartwell.py
+++ b/Chartwell.py
@@ -119,14 +119,14 @@ conversation_history = []
 LEVELS = {
    1:  {"expand": False, "top_k": 1, "max_tokens": 75,  "context_len": 500},
    2:  {"expand": False, "top_k": 1, "max_tokens": 75,  "context_len": 600},
-    3:  {"expand": False, "top_k": 2, "max_tokens": 100, "context_len": 700},
+    3:  {"expand": False, "top_k": 2, "max_tokens": 100*3, "context_len": 700},
-    4:  {"expand": False, "top_k": 2, "max_tokens": 100, "context_len": 800},
+    4:  {"expand": False, "top_k": 2, "max_tokens": 100*3, "context_len": 800},
-    5:  {"expand": False, "top_k": 3, "max_tokens": 125, "context_len": 1000},
+    5:  {"expand": False, "top_k": 3, "max_tokens": 125*3, "context_len": 1000},
-    6:  {"expand": False, "top_k": 3, "max_tokens": 150, "context_len": 1200},
+    6:  {"expand": False, "top_k": 3, "max_tokens": 150*3, "context_len": 1200},
-    7:  {"expand": True,  "top_k": 3, "max_tokens": 150, "context_len": 1400},
+    7:  {"expand": True,  "top_k": 3, "max_tokens": 150*3, "context_len": 1400},
-    8:  {"expand": True,  "top_k": 4, "max_tokens": 175, "context_len": 1600},
+    8:  {"expand": True,  "top_k": 4, "max_tokens": 175*3, "context_len": 1600},
-    9:  {"expand": True,  "top_k": 5, "max_tokens": 175, "context_len": 1800},
+    9:  {"expand": True,  "top_k": 5, "max_tokens": 175*3, "context_len": 1800},
-    10: {"expand": True,  "top_k": 5, "max_tokens": 200, "context_len": 2000},
+    10: {"expand": True,  "top_k": 5, "max_tokens": 200*3, "context_len": 2000},
 }
 # -------------------------
@@ -569,6 +569,21 @@ def parse_input(user_input):
        return question, filter_term
    return user_input, SEARCH_FILTER
 # --------------------------
 # Truncate context at a sentence boundary to avoid feeding the LLM incomplete fragments
 # -----------------------------
 def truncate_at_sentence(text, max_chars):
    if len(text) <= max_chars:
        return text
    truncated = text[:max_chars]
    last_period = max(
        truncated.rfind('.'),
        truncated.rfind('!'),
        truncated.rfind('?')
    )
    return truncated[:last_period + 1] if last_period > 0 else truncated
 # -------------------------
 # Ask question
 # -------------------------
@@ -585,7 +600,8 @@ def ask_question(question, show_sources=False, filter_term=None):
            print(chunk[:300])
        print("--- End chunks ---\n")
-    context = " ".join(top_chunks)[:level_cfg["context_len"]]
+#    context = " ".join(top_chunks)[:level_cfg["context_len"]]
    context = truncate_at_sentence(" ".join(top_chunks), level_cfg["context_len"])
    # Build conversation history string
    history_text = ""
@@ -604,7 +620,7 @@ def ask_question(question, show_sources=False, filter_term=None):
            f"Do not reference outside sources. "
            f"Do not repeat or echo the conversation history in your answer. "
            f"Do not include labels, separator lines, or notes in your answer. "
-            f"Stop immediately after answering.\n\n"
+            f"Stop immediately after answering, ending on a complete sentence."
        )
    if history_text: