From cb7d906ef7769ff7c7f1266bf1dd6b05d0222b6d Mon Sep 17 00:00:00 2001 From: Sean Date: Mon, 6 Apr 2026 12:24:05 -0400 Subject: [PATCH] Reorganize the corpus. Prevent answers from being truncated mid-sentence. --- .../ANNETTE DAVIS Case.txt | 0 .../{Procedure => Condo}/Chapter 718-113.txt | 0 .../{Procedure => Condo}/Chapter 718-1265.txt | 0 .../{Procedure => Condo}/Chapter 718-303.txt | 0 .../{Procedure => Condo}/Chapter 718-501.txt | 0 ... Condo & Homeowner R - Dr. Joyce Starr.txt | 0 .../{Procedure => Condo}/Florida Probate.txt | 0 ...o, Plaintiffs, v. AKAM ASSOCIATES, INC.txt | 0 Chartwell.py | 36 +++++++++++++------ 9 files changed, 26 insertions(+), 10 deletions(-) rename Books/{Procedure => Condo}/ANNETTE DAVIS Case.txt (100%) rename Books/{Procedure => Condo}/Chapter 718-113.txt (100%) rename Books/{Procedure => Condo}/Chapter 718-1265.txt (100%) rename Books/{Procedure => Condo}/Chapter 718-303.txt (100%) rename Books/{Procedure => Condo}/Chapter 718-501.txt (100%) rename Books/{Procedure => Condo}/Defend Your Condo & Homeowner R - Dr. Joyce Starr.txt (100%) rename Books/{Procedure => Condo}/Florida Probate.txt (100%) rename Books/{Procedure => Condo}/Patrick HAYES and Carmen Pacheco, Plaintiffs, v. AKAM ASSOCIATES, INC.txt (100%) diff --git a/Books/Procedure/ANNETTE DAVIS Case.txt b/Books/Condo/ANNETTE DAVIS Case.txt similarity index 100% rename from Books/Procedure/ANNETTE DAVIS Case.txt rename to Books/Condo/ANNETTE DAVIS Case.txt diff --git a/Books/Procedure/Chapter 718-113.txt b/Books/Condo/Chapter 718-113.txt similarity index 100% rename from Books/Procedure/Chapter 718-113.txt rename to Books/Condo/Chapter 718-113.txt diff --git a/Books/Procedure/Chapter 718-1265.txt b/Books/Condo/Chapter 718-1265.txt similarity index 100% rename from Books/Procedure/Chapter 718-1265.txt rename to Books/Condo/Chapter 718-1265.txt diff --git a/Books/Procedure/Chapter 718-303.txt b/Books/Condo/Chapter 718-303.txt similarity index 100% rename from Books/Procedure/Chapter 718-303.txt rename to Books/Condo/Chapter 718-303.txt diff --git a/Books/Procedure/Chapter 718-501.txt b/Books/Condo/Chapter 718-501.txt similarity index 100% rename from Books/Procedure/Chapter 718-501.txt rename to Books/Condo/Chapter 718-501.txt diff --git a/Books/Procedure/Defend Your Condo & Homeowner R - Dr. Joyce Starr.txt b/Books/Condo/Defend Your Condo & Homeowner R - Dr. Joyce Starr.txt similarity index 100% rename from Books/Procedure/Defend Your Condo & Homeowner R - Dr. Joyce Starr.txt rename to Books/Condo/Defend Your Condo & Homeowner R - Dr. Joyce Starr.txt diff --git a/Books/Procedure/Florida Probate.txt b/Books/Condo/Florida Probate.txt similarity index 100% rename from Books/Procedure/Florida Probate.txt rename to Books/Condo/Florida Probate.txt diff --git a/Books/Procedure/Patrick HAYES and Carmen Pacheco, Plaintiffs, v. AKAM ASSOCIATES, INC.txt b/Books/Condo/Patrick HAYES and Carmen Pacheco, Plaintiffs, v. AKAM ASSOCIATES, INC.txt similarity index 100% rename from Books/Procedure/Patrick HAYES and Carmen Pacheco, Plaintiffs, v. AKAM ASSOCIATES, INC.txt rename to Books/Condo/Patrick HAYES and Carmen Pacheco, Plaintiffs, v. AKAM ASSOCIATES, INC.txt diff --git a/Chartwell.py b/Chartwell.py index f694bcd..e477966 100644 --- a/Chartwell.py +++ b/Chartwell.py @@ -119,14 +119,14 @@ conversation_history = [] LEVELS = { 1: {"expand": False, "top_k": 1, "max_tokens": 75, "context_len": 500}, 2: {"expand": False, "top_k": 1, "max_tokens": 75, "context_len": 600}, - 3: {"expand": False, "top_k": 2, "max_tokens": 100, "context_len": 700}, - 4: {"expand": False, "top_k": 2, "max_tokens": 100, "context_len": 800}, - 5: {"expand": False, "top_k": 3, "max_tokens": 125, "context_len": 1000}, - 6: {"expand": False, "top_k": 3, "max_tokens": 150, "context_len": 1200}, - 7: {"expand": True, "top_k": 3, "max_tokens": 150, "context_len": 1400}, - 8: {"expand": True, "top_k": 4, "max_tokens": 175, "context_len": 1600}, - 9: {"expand": True, "top_k": 5, "max_tokens": 175, "context_len": 1800}, - 10: {"expand": True, "top_k": 5, "max_tokens": 200, "context_len": 2000}, + 3: {"expand": False, "top_k": 2, "max_tokens": 100*3, "context_len": 700}, + 4: {"expand": False, "top_k": 2, "max_tokens": 100*3, "context_len": 800}, + 5: {"expand": False, "top_k": 3, "max_tokens": 125*3, "context_len": 1000}, + 6: {"expand": False, "top_k": 3, "max_tokens": 150*3, "context_len": 1200}, + 7: {"expand": True, "top_k": 3, "max_tokens": 150*3, "context_len": 1400}, + 8: {"expand": True, "top_k": 4, "max_tokens": 175*3, "context_len": 1600}, + 9: {"expand": True, "top_k": 5, "max_tokens": 175*3, "context_len": 1800}, + 10: {"expand": True, "top_k": 5, "max_tokens": 200*3, "context_len": 2000}, } # ------------------------- @@ -569,6 +569,21 @@ def parse_input(user_input): return question, filter_term return user_input, SEARCH_FILTER +# -------------------------- +# Truncate context at a sentence boundary to avoid feeding the LLM incomplete fragments +# ----------------------------- +def truncate_at_sentence(text, max_chars): + if len(text) <= max_chars: + return text + truncated = text[:max_chars] + last_period = max( + truncated.rfind('.'), + truncated.rfind('!'), + truncated.rfind('?') + ) + return truncated[:last_period + 1] if last_period > 0 else truncated + + # ------------------------- # Ask question # ------------------------- @@ -585,7 +600,8 @@ def ask_question(question, show_sources=False, filter_term=None): print(chunk[:300]) print("--- End chunks ---\n") - context = " ".join(top_chunks)[:level_cfg["context_len"]] +# context = " ".join(top_chunks)[:level_cfg["context_len"]] + context = truncate_at_sentence(" ".join(top_chunks), level_cfg["context_len"]) # Build conversation history string history_text = "" @@ -604,7 +620,7 @@ def ask_question(question, show_sources=False, filter_term=None): f"Do not reference outside sources. " f"Do not repeat or echo the conversation history in your answer. " f"Do not include labels, separator lines, or notes in your answer. " - f"Stop immediately after answering.\n\n" + f"Stop immediately after answering, ending on a complete sentence." ) if history_text: