Skip to content

Commit 2cac4bb

Browse files
committed
update examples
1 parent b6ef4c0 commit 2cac4bb

File tree

4 files changed

+415
-1
lines changed

4 files changed

+415
-1
lines changed

examples/basic/1d-screen-click.py

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
"""
2+
3+
A Bit-Shooter Game played on a 1-dimensional binary screen.
4+
5+
Given an LLM Agent access to a 1-dimensional "screen" represented
6+
as a string of bits (0s and 1s), e.g. "101010",
7+
and equip it with a "Click tool" (like a mouse click) that allows it to
8+
click on a bit -- clicking the bit causes it to flip.
9+
10+
The Agent plays a "Bit Shooter" game where the goal is to get rid of all
11+
1s in the "screen".
12+
13+
To use the Click tool, the Agent must specify the position (zero-based)
14+
where it wants to click. This causes the bit to flip.
15+
The LLM is then presented with the new state of the screen,
16+
and the process repeats until all 1s are gone.
17+
18+
Clearly the Agent (LLM) needs to be able to accurately count the bit positions,
19+
to be able to correctly click on the 1s.
20+
21+
Run like this (--model is optional, defaults to GPT4o):
22+
23+
python3 examples/basic/1d-screen-click.py --model litellm/anthropic/claude-3-5-sonnet-20241022
24+
25+
At the beginning you get to specify the initial state of the screen:
26+
- size of the screen (how many bits)
27+
- the (0-based) locations of the 1s (SPACE-separated) in the screen.
28+
29+
E.g. try this:
30+
- size = 50,
31+
- 1-indices: 0 20 30 40
32+
33+
The loop is set to run in interactive mode (to prevent runaway loops),
34+
so you have to keep hitting enter to see the LLM's next move.
35+
36+
The main observation is that when you run it with claude-3.5-sonnet,
37+
the accuracy of the Agent's clicks is far superior to other LLMs like GPT-4o
38+
and even GPT-4.
39+
40+
To try with other LLMs, you can set the --model param to, for example:
41+
- gpt-4 (set OPENAI_API_KEY in your env or .env file)
42+
- gpt-4o (ditto, set OPENAI_API_KEY)
43+
- groq/llama-3.1-70b-versatile (set GROQ_API_KEY in your env or .env file)
44+
- cerebras/llama3.1-70b (set CEREBRAS_API_KEY in your env or .env file)
45+
- ollama/qwen2.5-coder:latest
46+
47+
See here for a full guide on local/open LLM setup with Langroid:
48+
https://langroid.github.io/langroid/tutorials/local-llm-setup/
49+
And here for how to use with other non-OpenAPI LLMs:
50+
https://langroid.github.io/langroid/tutorials/non-openai-llms/
51+
"""
52+
53+
from typing import List, Tuple
54+
55+
import langroid as lr
56+
import langroid.language_models as lm
57+
from langroid.agent.tools.orchestration import AgentDoneTool
58+
from langroid.pydantic_v1 import BaseModel
59+
from langroid.utils.globals import GlobalState
60+
from rich.prompt import Prompt
61+
import fire
62+
63+
64+
class ScreenState(BaseModel):
65+
"""
66+
Represents the state of the 1-dimensional binary screen
67+
"""
68+
69+
screen: str | None = None # binary string, e.g. "101010"
70+
71+
def __init__(
72+
self,
73+
one_indices: List[int] = [1],
74+
size: int = 1,
75+
):
76+
super().__init__()
77+
# Initialize with all zeros
78+
screen_list = ["0"] * size
79+
80+
# Set 1s at specified indices
81+
for idx in one_indices:
82+
if 0 <= idx < size:
83+
screen_list[idx] = "1"
84+
85+
# Join into string
86+
self.screen = "".join(screen_list)
87+
88+
@classmethod
89+
def set_state(
90+
cls,
91+
one_indices: List[int],
92+
size: int,
93+
) -> "ScreenState":
94+
"""
95+
Factory method to create and set initial state.
96+
"""
97+
initial_state = cls(
98+
one_indices=one_indices,
99+
size=size,
100+
)
101+
GlobalScreenState.set_values(state=initial_state)
102+
103+
def flip(self, i: int):
104+
"""
105+
Flip the i-th bit
106+
"""
107+
if self.screen is None or i < 0 or i >= len(self.screen):
108+
return
109+
110+
screen_list = list(self.screen)
111+
screen_list[i] = "1" if screen_list[i] == "0" else "0"
112+
self.screen = "".join(screen_list)
113+
114+
115+
class GlobalScreenState(GlobalState):
116+
state: ScreenState = ScreenState()
117+
118+
119+
def get_state() -> ScreenState:
120+
return GlobalScreenState.get_value("state")
121+
122+
123+
class ClickTool(lr.ToolMessage):
124+
request: str = "click_tool"
125+
purpose: str = """
126+
To click at <position> on the 1-dimensional binary screen,
127+
which causes the bit at that position to FLIP.
128+
IMPORTANT: the position numbering starts from 0!!!
129+
"""
130+
131+
position: int
132+
133+
@classmethod
134+
def examples(cls) -> List[lr.ToolMessage | Tuple[str, lr.ToolMessage]]:
135+
return [
136+
cls(position=3),
137+
(
138+
"I want to click at position 5",
139+
cls(position=5),
140+
),
141+
]
142+
143+
def handle(self) -> str | AgentDoneTool:
144+
state = get_state()
145+
state.flip(self.position)
146+
print("SCREEN STATE = ", state.screen)
147+
if "1" not in state.screen:
148+
return AgentDoneTool()
149+
return state.screen
150+
151+
152+
def main(model: str = ""):
153+
llm_config = lm.OpenAIGPTConfig(
154+
chat_model=model or lm.OpenAIChatModel.GPT4o,
155+
)
156+
click_tool_name = ClickTool.default_value("request")
157+
agent = lr.ChatAgent(
158+
lr.ChatAgentConfig(
159+
name="Clicker",
160+
llm=llm_config,
161+
use_functions_api=False, # suppress OpenAI functions/tools
162+
use_tools=True, # enable langroid-native tools: works with any LLM
163+
show_stats=False,
164+
system_message=f"""
165+
You are an expert at COMPUTER USE.
166+
In this task you only have to be able to understand a 1-dimensional
167+
screen presented to you as a string of bits (0s and 1s).
168+
You will play a 1-dimensional BIT-shooter game!
169+
170+
Your task is to CLICK ON THE LEFTMOST 1 in the bit-string,
171+
to flip it to a 0.
172+
173+
Always try to click on the LEFTMOST 1 in the bit-sequence.
174+
175+
To CLICK on the screen you
176+
must use the TOOL `{click_tool_name}` where the
177+
`position` field specifies the position (zero-based) to click.
178+
If you CORRECTLY click on a 1, the bit at that position will be
179+
turned to 0.
180+
But if you click on a 0, it will turn into a 1,
181+
taking you further from your goal.
182+
183+
So you MUST ACCURATELY specify the position of the LEFTMOST 1 to click,
184+
making SURE there is a 1 at that position.
185+
In other words, it is critical that you are able to ACCURATELY COUNT
186+
the bit positions so that you are able to correctly identify the position
187+
of the LEFTMOST 1 bit in the "screen" given to you as a string of bits.
188+
""",
189+
)
190+
)
191+
192+
agent.enable_message(ClickTool)
193+
194+
task = lr.Task(agent, interactive=True, only_user_quits_root=False)
195+
196+
# kick it off with initial screen state (set below by user)
197+
task.run(get_state())
198+
199+
200+
if __name__ == "__main__":
201+
size = int(Prompt.ask("Size of screen (how many bits)"))
202+
ones = Prompt.ask("Indices of 1s (SPACE-separated)").split(" ")
203+
ones = [int(x) for x in ones]
204+
ScreenState.set_state(ones, size)
205+
print("SCREEN STATE = ", get_state().screen)
206+
fire.Fire(main)

examples/basic/chat-persist.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
Variant of chat.py, showing how you can save conversation state, end the script
2+
Variant of chat.py, showing how you can save conversation state, end the script, and
33
resume the conversation later by re-running the script.
44
55
The most basic chatbot example, using the default settings.
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
"""
2+
Agent-loop to classify the intent of a given text.
3+
4+
Run like this (--model is optional, defaults to GPT4o):
5+
6+
python3 examples/basic/intent-classifier.py --model groq/llama-3.1-8b-instant
7+
8+
Other ways to specify the model:
9+
- gpt-4 (set OPENAI_API_KEY in your env or .env file)
10+
- gpt-4o (ditto, set OPENAI_API_KEY)
11+
- cerebras/llama3.1-70b (set CEREBRAS_API_KEY)
12+
13+
For more ways to use langroid with other LLMs, see:
14+
- local/open LLMs: https://langroid.github.io/langroid/tutorials/local-llm-setup/
15+
- non-OpenAPI LLMs: https://langroid.github.io/langroid/tutorials/non-openai-llms/
16+
"""
17+
18+
from typing import List, Tuple
19+
20+
import langroid as lr
21+
from langroid.agent.tools.orchestration import ResultTool
22+
import langroid.language_models as lm
23+
from enum import Enum
24+
from rich.prompt import Prompt
25+
from fire import Fire
26+
27+
28+
class Intent(str, Enum):
29+
GREETING = "greeting"
30+
FAREWELL = "farewell"
31+
QUESTION = "question"
32+
STATEMENT = "statement"
33+
34+
35+
class IntentTool(lr.ToolMessage):
36+
request: str = "intent_tool"
37+
purpose: str = """
38+
To classify the <intent> of a given text, into one of:
39+
- greeting
40+
- farewell
41+
- question
42+
- statement
43+
"""
44+
45+
intent: Intent
46+
47+
@classmethod
48+
def examples(cls) -> List[lr.ToolMessage | Tuple[str, lr.ToolMessage]]:
49+
"""Use these as few-shot tool examples"""
50+
return [
51+
cls(intent=Intent.GREETING),
52+
("I want to classify this as a question", cls(intent=Intent.QUESTION)),
53+
]
54+
55+
def handle(self) -> ResultTool:
56+
"""Handle the tool sent by LLM"""
57+
58+
# ResultTool ends the task
59+
return ResultTool(intent=self.intent)
60+
61+
def handle_message_fallback(self, message: lr.ChatDocument) -> str | None:
62+
"""We end up here if the LLM did not send a Tool, so nudge it"""
63+
if (
64+
isinstance(message, lr.ChatDocument)
65+
and message.metadata.sender == lr.Entity.LLM
66+
):
67+
return """
68+
You forgot to use the `intent_tool` to classify the intent.
69+
"""
70+
71+
72+
def main(model: str = ""):
73+
intent_tool_name = IntentTool.default_value("request")
74+
agent = lr.ChatAgent(
75+
lr.ChatAgentConfig(
76+
name="Intent",
77+
llm=lm.OpenAIGPTConfig(chat_model=model or lm.OpenAIChatModel.GPT4o),
78+
use_functions_api=False, # suppress OpenAI functions/tools
79+
use_tools=True, # use langroid-native tools: works with ANY LLM
80+
system_message=f"""
81+
You are an astute INTENT CLASSIFIER: given any piece of text
82+
from the user, you are able to smartly infer their intent.
83+
Given such a piece of text, classify its intent into one of the following:
84+
- greeting
85+
- farewell
86+
- question
87+
- statement
88+
To present your classification, use the
89+
`{intent_tool_name}` tool.
90+
91+
ALWAYS use this tool to respond, do NOT say anything else.
92+
""",
93+
)
94+
)
95+
96+
agent.enable_message(IntentTool)
97+
98+
# create a task loop specialized to return an Intent
99+
task = lr.Task(agent=agent, interactive=False)[Intent]
100+
101+
while True:
102+
text = Prompt.ask("Enter a text to classify its intent")
103+
intent = task.run(
104+
f"""
105+
Please classify the intent of this text, present your answer
106+
using the `{intent_tool_name}` tool:
107+
----
108+
{text}
109+
----
110+
"""
111+
)
112+
113+
print(f"Intent: {intent}")
114+
115+
116+
if __name__ == "__main__":
117+
118+
Fire(main)

0 commit comments

Comments
 (0)