Reorganize the corpus. Prevent answers from being truncated mid-sentence.
This commit is contained in:
36
Chartwell.py
36
Chartwell.py
@@ -119,14 +119,14 @@ conversation_history = []
|
|||||||
LEVELS = {
|
LEVELS = {
|
||||||
1: {"expand": False, "top_k": 1, "max_tokens": 75, "context_len": 500},
|
1: {"expand": False, "top_k": 1, "max_tokens": 75, "context_len": 500},
|
||||||
2: {"expand": False, "top_k": 1, "max_tokens": 75, "context_len": 600},
|
2: {"expand": False, "top_k": 1, "max_tokens": 75, "context_len": 600},
|
||||||
3: {"expand": False, "top_k": 2, "max_tokens": 100, "context_len": 700},
|
3: {"expand": False, "top_k": 2, "max_tokens": 100*3, "context_len": 700},
|
||||||
4: {"expand": False, "top_k": 2, "max_tokens": 100, "context_len": 800},
|
4: {"expand": False, "top_k": 2, "max_tokens": 100*3, "context_len": 800},
|
||||||
5: {"expand": False, "top_k": 3, "max_tokens": 125, "context_len": 1000},
|
5: {"expand": False, "top_k": 3, "max_tokens": 125*3, "context_len": 1000},
|
||||||
6: {"expand": False, "top_k": 3, "max_tokens": 150, "context_len": 1200},
|
6: {"expand": False, "top_k": 3, "max_tokens": 150*3, "context_len": 1200},
|
||||||
7: {"expand": True, "top_k": 3, "max_tokens": 150, "context_len": 1400},
|
7: {"expand": True, "top_k": 3, "max_tokens": 150*3, "context_len": 1400},
|
||||||
8: {"expand": True, "top_k": 4, "max_tokens": 175, "context_len": 1600},
|
8: {"expand": True, "top_k": 4, "max_tokens": 175*3, "context_len": 1600},
|
||||||
9: {"expand": True, "top_k": 5, "max_tokens": 175, "context_len": 1800},
|
9: {"expand": True, "top_k": 5, "max_tokens": 175*3, "context_len": 1800},
|
||||||
10: {"expand": True, "top_k": 5, "max_tokens": 200, "context_len": 2000},
|
10: {"expand": True, "top_k": 5, "max_tokens": 200*3, "context_len": 2000},
|
||||||
}
|
}
|
||||||
|
|
||||||
# -------------------------
|
# -------------------------
|
||||||
@@ -569,6 +569,21 @@ def parse_input(user_input):
|
|||||||
return question, filter_term
|
return question, filter_term
|
||||||
return user_input, SEARCH_FILTER
|
return user_input, SEARCH_FILTER
|
||||||
|
|
||||||
|
# --------------------------
|
||||||
|
# Truncate context at a sentence boundary to avoid feeding the LLM incomplete fragments
|
||||||
|
# -----------------------------
|
||||||
|
def truncate_at_sentence(text, max_chars):
|
||||||
|
if len(text) <= max_chars:
|
||||||
|
return text
|
||||||
|
truncated = text[:max_chars]
|
||||||
|
last_period = max(
|
||||||
|
truncated.rfind('.'),
|
||||||
|
truncated.rfind('!'),
|
||||||
|
truncated.rfind('?')
|
||||||
|
)
|
||||||
|
return truncated[:last_period + 1] if last_period > 0 else truncated
|
||||||
|
|
||||||
|
|
||||||
# -------------------------
|
# -------------------------
|
||||||
# Ask question
|
# Ask question
|
||||||
# -------------------------
|
# -------------------------
|
||||||
@@ -585,7 +600,8 @@ def ask_question(question, show_sources=False, filter_term=None):
|
|||||||
print(chunk[:300])
|
print(chunk[:300])
|
||||||
print("--- End chunks ---\n")
|
print("--- End chunks ---\n")
|
||||||
|
|
||||||
context = " ".join(top_chunks)[:level_cfg["context_len"]]
|
# context = " ".join(top_chunks)[:level_cfg["context_len"]]
|
||||||
|
context = truncate_at_sentence(" ".join(top_chunks), level_cfg["context_len"])
|
||||||
|
|
||||||
# Build conversation history string
|
# Build conversation history string
|
||||||
history_text = ""
|
history_text = ""
|
||||||
@@ -604,7 +620,7 @@ def ask_question(question, show_sources=False, filter_term=None):
|
|||||||
f"Do not reference outside sources. "
|
f"Do not reference outside sources. "
|
||||||
f"Do not repeat or echo the conversation history in your answer. "
|
f"Do not repeat or echo the conversation history in your answer. "
|
||||||
f"Do not include labels, separator lines, or notes in your answer. "
|
f"Do not include labels, separator lines, or notes in your answer. "
|
||||||
f"Stop immediately after answering.\n\n"
|
f"Stop immediately after answering, ending on a complete sentence."
|
||||||
)
|
)
|
||||||
|
|
||||||
if history_text:
|
if history_text:
|
||||||
|
|||||||
Reference in New Issue
Block a user