Skip to content

Commit 2becd9c

Browse files
committed
+examples/docqa/extract-then-chat.py
1 parent 77bd84e commit 2becd9c

File tree

3 files changed

+489
-0
lines changed

3 files changed

+489
-0
lines changed

examples/docqa/books.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
2+
Book Title: Crime and Redemption by Filidor Dostoyevski, released in 1877, offers a
3+
riveting exploration of guilt, morality, and the possibility of spiritual rebirth.
4+
Set against the bleak backdrop of 19th century Russia, it follows the tormented journey
5+
of Rodion Romanovich Raskolnikov, a young man driven to murder and subsequently
6+
haunted by his actions. Through Raskolnikov's story, Dostoyevski delves deep into the
7+
human psyche, presenting a timeless narrative of human imperfection and the
8+
redemptive power.
9+
10+
Book Title: The Siblings Karamazoff by Fyodar Dostoyevskiy, published in 1881,
11+
weaves a complex narrative around the ethical battles and spiritual dilemmas
12+
faced by the Karamazoff family. Set in the heart of Russia, it explores themes of faith,
13+
doubt, and the nature of free will through the intersecting lives of three brothers,
14+
each embodying different facets of humanity. Dostoyevskiy masterfully crafts a tale of
15+
familial bonds, existential questioning, and the search for truth in a morally ambiguous
16+
world.
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
"""
2+
3-Agent system to first extract a few pieces of info, then chat with user.
3+
4+
- Assistant: helps user answer questions about a Book. But first it needs to
5+
extract some information from a document about the Book, using Extractor.
6+
- Extractor: generates questions about the Book document, one by one,
7+
then returns all info to Assistant using a tool message.
8+
- DocAgent: answers the questions generated by Extractor, based on the Book doc.
9+
10+
Run like this:
11+
12+
python3 examples/chainlit/extract-then-chat.py
13+
14+
"""
15+
from langroid import ChatDocument
16+
from pydantic import BaseModel
17+
from typing import List
18+
import os
19+
from fire import Fire
20+
21+
from rich import print
22+
from rich.prompt import Prompt
23+
import langroid as lr
24+
import langroid.language_models as lm
25+
from langroid.mytypes import Entity
26+
from langroid.agent.special.doc_chat_agent import DocChatAgent, DocChatAgentConfig
27+
from langroid.parsing.parser import ParsingConfig
28+
from langroid.agent.chat_agent import ChatAgent, ChatAgentConfig
29+
from langroid.agent.task import Task
30+
from langroid.agent.tool_message import ToolMessage
31+
from langroid.utils.configuration import set_global, Settings
32+
from langroid.utils.constants import NO_ANSWER, DONE, SEND_TO, PASS
33+
34+
from textwrap import dedent
35+
from dotenv import load_dotenv
36+
37+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
38+
39+
40+
class BookInfo(BaseModel):
41+
title: str
42+
author: str
43+
year: int
44+
45+
46+
class BookInfoTool(ToolMessage):
47+
request: str = "book_info"
48+
purpose: str = "Collect <info> about Books"
49+
50+
info: List[BookInfo]
51+
52+
def handle(self) -> str:
53+
"""Exit task and pass tool to parent"""
54+
return DONE + " " + PASS
55+
56+
@classmethod
57+
def examples(cls) -> List["BookInfoTool"]:
58+
return [
59+
cls(
60+
info=[
61+
BookInfo(title="The Hobbit", author="J.R.R. Tolkien", year=1937),
62+
BookInfo(
63+
title="The Great Gatsby",
64+
author="F. Scott Fitzgerald",
65+
year=1925,
66+
),
67+
]
68+
)
69+
]
70+
71+
72+
class Assistant(ChatAgent):
73+
def book_info(self, msg: BookInfoTool) -> str:
74+
# convert info to NON-JSON so it doesn't look like a tool,
75+
# and insert routing so that the Assistan't LLM responds to it, not user.
76+
info_str = str(msg.info).replace("{", "[").replace("}", "]")
77+
return f"""{SEND_TO}LLM
78+
Below is INFO about various books, you received from the Extractor.
79+
Now ask the user what help they need, and respond ONLY based on this INFO.
80+
81+
INFO:
82+
{info_str}
83+
"""
84+
85+
86+
class Extractor(ChatAgent):
87+
def handle_message_fallback(
88+
self, msg: str | ChatDocument
89+
) -> str | ChatDocument | None:
90+
"""Nudge LLM when it fails to use book_info correctly"""
91+
if self.has_tool_message_attempt(msg):
92+
return """
93+
You must use the "book_info" tool to present the info.
94+
You either forgot to use it, or you used it with the wrong format.
95+
Make sure all fields are filled out and pay attention to the
96+
required types of the fields.
97+
"""
98+
99+
100+
def chat(
101+
model: str = "", # or, e.g., "ollma/mistral:7b-instruct-v0.2-q8_0"
102+
debug: bool = False,
103+
no_cache: bool = False, # whether to disablue using cached LLM responses
104+
):
105+
print(
106+
"""
107+
Hello! I am your book info helper.
108+
First I will get info about some books
109+
"""
110+
)
111+
112+
load_dotenv()
113+
114+
set_global(
115+
Settings(
116+
debug=debug,
117+
cache=not no_cache, # disables cache lookup; set to True to use cache
118+
)
119+
)
120+
121+
llm_cfg = lm.OpenAIGPTConfig(
122+
# or, e.g. "ollama/mistral:7b-instruct-v0.2-q8_0" but result may be brittle
123+
chat_model=model or lm.OpenAIChatModel.GPT4_TURBO,
124+
chat_context_length=16_000, # adjust based on model
125+
)
126+
doc_agent = DocChatAgent(
127+
DocChatAgentConfig(
128+
llm=llm_cfg,
129+
n_neighbor_chunks=2,
130+
parsing=ParsingConfig(
131+
chunk_size=50,
132+
overlap=10,
133+
n_similar_docs=3,
134+
n_neighbor_ids=4,
135+
),
136+
vecdb=lr.vector_store.LanceDBConfig(
137+
collection_name="book_info",
138+
replace_collection=True,
139+
storage_path=".lancedb/data/",
140+
embedding=lr.embedding_models.SentenceTransformerEmbeddingsConfig(
141+
model_type="sentence-transformer",
142+
model_name="BAAI/bge-large-en-v1.5",
143+
),
144+
),
145+
cross_encoder_reranking_model="",
146+
)
147+
)
148+
doc_agent.ingest_doc_paths(["examples/docqa/books.txt"])
149+
doc_task = Task(
150+
doc_agent,
151+
name="DocAgent",
152+
done_if_no_response=[Entity.LLM], # done if null response from LLM
153+
done_if_response=[Entity.LLM], # done if non-null response from LLM
154+
# Don't use system_message here since it will override doc chat agent's
155+
# default system message
156+
)
157+
158+
extractor_agent = Extractor(
159+
ChatAgentConfig(
160+
llm=llm_cfg,
161+
vecdb=None,
162+
)
163+
)
164+
extractor_agent.enable_message(BookInfoTool)
165+
166+
extractor_task = Task(
167+
extractor_agent,
168+
name="Extractor",
169+
interactive=False, # set to True to slow it down (hit enter to progress)
170+
system_message=f"""
171+
You are an expert at understanding JSON function/tool specifications.
172+
You must extract information about various books from a document,
173+
to finally present the info using the `book_info` tool/function,
174+
but you do not have access to the document.
175+
I can help with your questions about the document.
176+
You have to ask questions in these steps:
177+
1. ask which books are in the document
178+
2. for each book, ask the various pieces of info you need.
179+
180+
If I am unable to answer your question initially, try asking differently,
181+
and if I am still unable to answer after 3 tries,
182+
fill in {NO_ANSWER} for that field.
183+
Think step by step.
184+
185+
Do not explain yourself, or say any extraneous things.
186+
When you receive the answer, then ask for the next field, and so on.
187+
""",
188+
)
189+
190+
assistant_agent = Assistant(
191+
ChatAgentConfig(
192+
llm=llm_cfg,
193+
vecdb=None,
194+
)
195+
)
196+
assistant_agent.enable_message(lr.agent.tools.RecipientTool)
197+
# enable assistant to HANDLE the book_info tool but not USE it
198+
assistant_agent.enable_message(BookInfoTool, use=False, handle=True)
199+
assistant_task = Task(
200+
assistant_agent,
201+
name="Assistant",
202+
interactive=True,
203+
system_message=f"""
204+
You are a helpful librarian, answering my (the user) questions about
205+
books described in a certain document, and you do NOT know which
206+
books are in the document.
207+
208+
FIRST you need to ask the "Extractor" to collect information
209+
about various books that are in a certain document. Address your request to the
210+
Extractor using the 'recipient_message' tool/function.
211+
212+
Once you receive the information, you should then ask me (the user)
213+
what I need help with.
214+
""",
215+
)
216+
217+
assistant_task.add_sub_task([extractor_task])
218+
extractor_task.add_sub_task([doc_task])
219+
220+
# must use run() instead of run_async() because DocChatAgent
221+
# does not have an async llm_response method
222+
assistant_task.run()
223+
224+
225+
if __name__ == "__main__":
226+
Fire(chat)

0 commit comments

Comments
 (0)