Reorganize the corpus. Prevent answers from being truncated mid-sentence.
This commit is contained in:
36
Chartwell.py
36
Chartwell.py
@@ -119,14 +119,14 @@ conversation_history = []
|
||||
LEVELS = {
|
||||
1: {"expand": False, "top_k": 1, "max_tokens": 75, "context_len": 500},
|
||||
2: {"expand": False, "top_k": 1, "max_tokens": 75, "context_len": 600},
|
||||
3: {"expand": False, "top_k": 2, "max_tokens": 100, "context_len": 700},
|
||||
4: {"expand": False, "top_k": 2, "max_tokens": 100, "context_len": 800},
|
||||
5: {"expand": False, "top_k": 3, "max_tokens": 125, "context_len": 1000},
|
||||
6: {"expand": False, "top_k": 3, "max_tokens": 150, "context_len": 1200},
|
||||
7: {"expand": True, "top_k": 3, "max_tokens": 150, "context_len": 1400},
|
||||
8: {"expand": True, "top_k": 4, "max_tokens": 175, "context_len": 1600},
|
||||
9: {"expand": True, "top_k": 5, "max_tokens": 175, "context_len": 1800},
|
||||
10: {"expand": True, "top_k": 5, "max_tokens": 200, "context_len": 2000},
|
||||
3: {"expand": False, "top_k": 2, "max_tokens": 100*3, "context_len": 700},
|
||||
4: {"expand": False, "top_k": 2, "max_tokens": 100*3, "context_len": 800},
|
||||
5: {"expand": False, "top_k": 3, "max_tokens": 125*3, "context_len": 1000},
|
||||
6: {"expand": False, "top_k": 3, "max_tokens": 150*3, "context_len": 1200},
|
||||
7: {"expand": True, "top_k": 3, "max_tokens": 150*3, "context_len": 1400},
|
||||
8: {"expand": True, "top_k": 4, "max_tokens": 175*3, "context_len": 1600},
|
||||
9: {"expand": True, "top_k": 5, "max_tokens": 175*3, "context_len": 1800},
|
||||
10: {"expand": True, "top_k": 5, "max_tokens": 200*3, "context_len": 2000},
|
||||
}
|
||||
|
||||
# -------------------------
|
||||
@@ -569,6 +569,21 @@ def parse_input(user_input):
|
||||
return question, filter_term
|
||||
return user_input, SEARCH_FILTER
|
||||
|
||||
# --------------------------
|
||||
# Truncate context at a sentence boundary to avoid feeding the LLM incomplete fragments
|
||||
# -----------------------------
|
||||
def truncate_at_sentence(text, max_chars):
|
||||
if len(text) <= max_chars:
|
||||
return text
|
||||
truncated = text[:max_chars]
|
||||
last_period = max(
|
||||
truncated.rfind('.'),
|
||||
truncated.rfind('!'),
|
||||
truncated.rfind('?')
|
||||
)
|
||||
return truncated[:last_period + 1] if last_period > 0 else truncated
|
||||
|
||||
|
||||
# -------------------------
|
||||
# Ask question
|
||||
# -------------------------
|
||||
@@ -585,7 +600,8 @@ def ask_question(question, show_sources=False, filter_term=None):
|
||||
print(chunk[:300])
|
||||
print("--- End chunks ---\n")
|
||||
|
||||
context = " ".join(top_chunks)[:level_cfg["context_len"]]
|
||||
# context = " ".join(top_chunks)[:level_cfg["context_len"]]
|
||||
context = truncate_at_sentence(" ".join(top_chunks), level_cfg["context_len"])
|
||||
|
||||
# Build conversation history string
|
||||
history_text = ""
|
||||
@@ -604,7 +620,7 @@ def ask_question(question, show_sources=False, filter_term=None):
|
||||
f"Do not reference outside sources. "
|
||||
f"Do not repeat or echo the conversation history in your answer. "
|
||||
f"Do not include labels, separator lines, or notes in your answer. "
|
||||
f"Stop immediately after answering.\n\n"
|
||||
f"Stop immediately after answering, ending on a complete sentence."
|
||||
)
|
||||
|
||||
if history_text:
|
||||
|
||||
Reference in New Issue
Block a user