最核心的部分:
agent.py
from __future__ import annotations
import inspect
import logging
from datetime import datetime
from typing import TYPE_CHECKING, Optional
import sentry_sdk
from forge.agent.base import BaseAgent, BaseAgentConfiguration, BaseAgentSettings
from forge.agent.protocols import (
AfterExecute,
AfterParse,
CommandProvider,
DirectiveProvider,
MessageProvider,
)
from forge.command.command import Command, CommandOutput
from forge.components.action_history import (
ActionHistoryComponent,
EpisodicActionHistory,
)
from forge.components.code_executor.code_executor import CodeExecutorComponent
from forge.components.context.context import AgentContext, ContextComponent
from forge.components.file_manager import FileManagerComponent
from forge.components.git_operations import GitOperationsComponent
from forge.components.image_gen import ImageGeneratorComponent
from forge.components.system import SystemComponent
from forge.components.user_interaction import UserInteractionComponent
from forge.components.watchdog import WatchdogComponent
from forge.components.web import WebSearchComponent, WebSeleniumComponent
from forge.file_storage.base import FileStorage
from forge.llm.prompting.schema import ChatPrompt
from forge.llm.prompting.utils import dump_prompt
from forge.llm.providers import (
AssistantFunctionCall,
ChatMessage,
ChatModelProvider,
ChatModelResponse,
)
from forge.llm.providers.utils import function_specs_from_commands
from forge.models.action import (
ActionErrorResult,
ActionInterruptedByHuman,
ActionResult,
ActionSuccessResult,
)
from forge.models.config import Configurable
from forge.utils.exceptions import (
AgentException,
AgentTerminated,
CommandExecutionError,
UnknownCommandError,
)
from pydantic import Field
from autogpt.app.log_cycle import (
CURRENT_CONTEXT_FILE_NAME,
NEXT_ACTION_FILE_NAME,
USER_INPUT_FILE_NAME,
LogCycleHandler,
)
from .prompt_strategies.one_shot import (
OneShotAgentActionProposal,
OneShotAgentPromptStrategy,
)
if TYPE_CHECKING:
from forge.config.config import Config
logger = logging.getLogger(__name__)
class AgentConfiguration(BaseAgentConfiguration):
pass
class AgentSettings(BaseAgentSettings):
config: AgentConfiguration = Field(default_factory=AgentConfiguration)
history: EpisodicActionHistory[OneShotAgentActionProposal] = Field(
default_factory=EpisodicActionHistory[OneShotAgentActionProposal]
)
"""(STATE) The action history of the agent."""
context: AgentContext = Field(default_factory=AgentContext)
class Agent(BaseAgent, Configurable[AgentSettings]):
default_settings: AgentSettings = AgentSettings(
name="Agent",
description=__doc__ if __doc__ else "",
)
def __init__(
self,
settings: AgentSettings,
llm_provider: ChatModelProvider,
file_storage: FileStorage,
legacy_config: Config,
):
super().__init__(settings)
self.llm_provider = llm_provider
self.ai_profile = settings.ai_profile
self.directives = settings.directives
prompt_config = OneShotAgentPromptStrategy.default_configuration.copy(deep=True)
prompt_config.use_functions_api = (
settings.config.use_functions_api
# Anthropic currently doesn't support tools + prefilling :(
and self.llm.provider_name != "anthropic"
)
self.prompt_strategy = OneShotAgentPromptStrategy(prompt_config, logger)
self.commands: list[Command] = []
# Components
self.system = SystemComponent(legacy_config, settings.ai_profile)
self.history = ActionHistoryComponent(
settings.history,
self.send_token_limit,
lambda x: self.llm_provider.count_tokens(x, self.llm.name),
legacy_config,
llm_provider,
).run_after(WatchdogComponent)
self.user_interaction = UserInteractionComponent(legacy_config)
self.file_manager = FileManagerComponent(settings, file_storage)
self.code_executor = CodeExecutorComponent(
self.file_manager.workspace,
settings,
legacy_config,
)
self.git_ops = GitOperationsComponent(legacy_config)
self.image_gen = ImageGeneratorComponent(
self.file_manager.workspace, legacy_config
)
self.web_search = WebSearchComponent(legacy_config)
self.web_selenium = WebSeleniumComponent(legacy_config, llm_provider, self.llm)
self.context = ContextComponent(self.file_manager.workspace, settings.context)
self.watchdog = WatchdogComponent(settings.config, settings.history).run_after(
ContextComponent
)
self.created_at = datetime.now().strftime("%Y%m%d_%H%M%S")
"""Timestamp the agent was created; only used for structured debug logging."""
self.log_cycle_handler = LogCycleHandler()
"""LogCycleHandler for structured debug logging."""
self.event_history = settings.history
self.legacy_config = legacy_config
async def propose_action(self) -> OneShotAgentActionProposal:
"""Proposes the next action to execute, based on the task and current state.
Returns:
The command name and arguments, if any, and the agent's thoughts.
"""
self.reset_trace()
# Get directives
resources = await self.run_pipeline(DirectiveProvider.get_resources)
constraints = await self.run_pipeline(DirectiveProvider.get_constraints)
best_practices = await self.run_pipeline(DirectiveProvider.get_best_practices)
directives = self.state.directives.copy(deep=True)
directives.resources += resources
directives.constraints += constraints
directives.best_practices += best_practices
# Get commands
self.commands = await self.run_pipeline(CommandProvider.get_commands)
self._remove_disabled_commands()
# Get messages
messages = await self.run_pipeline(MessageProvider.get_messages)
prompt: ChatPrompt = self.prompt_strategy.build_prompt(
messages=messages,
task=self.state.task,
ai_profile=self.state.ai_profile,
ai_directives=directives,
commands=function_specs_from_commands(self.commands),
include_os_info=self.legacy_config.execute_local_commands,
)
self.log_cycle_handler.log_count_within_cycle = 0
self.log_cycle_handler.log_cycle(
self.state.ai_profile.ai_name,
self.created_at,
self.config.cycle_count,
prompt.raw(),
CURRENT_CONTEXT_FILE_NAME,
)
logger.debug(f"Executing prompt:\n{dump_prompt(prompt)}")
output = await self.complete_and_parse(prompt)
self.config.cycle_count += 1
return output
async def complete_and_parse(
self, prompt: ChatPrompt, exception: Optional[Exception] = None
) -> OneShotAgentActionProposal:
if exception:
prompt.messages.append(ChatMessage.system(f"Error: {exception}"))
response: ChatModelResponse[
OneShotAgentActionProposal
] = await self.llm_provider.create_chat_completion(
prompt.messages,
model_name=self.llm.name,
completion_parser=self.prompt_strategy.parse_response_content,
functions=prompt.functions,
prefill_response=prompt.prefill_response,
)
result = response.parsed_result
self.log_cycle_handler.log_cycle(
self.state.ai_profile.ai_name,
self.created_at,
self.config.cycle_count,
result.thoughts.dict(),
NEXT_ACTION_FILE_NAME,
)
await self.run_pipeline(AfterParse.after_parse, result)
return result
async def execute(
self,
proposal: OneShotAgentActionProposal,
user_feedback: str = "",
) -> ActionResult:
tool = proposal.use_tool
# Get commands
self.commands = await self.run_pipeline(CommandProvider.get_commands)
self._remove_disabled_commands()
try:
return_value = await self._execute_tool(tool)
result = ActionSuccessResult(outputs=return_value)
except AgentTerminated:
raise
except AgentException as e:
result = ActionErrorResult.from_exception(e)
logger.warning(f"{tool} raised an error: {e}")
sentry_sdk.capture_exception(e)
result_tlength = self.llm_provider.count_tokens(str(result), self.llm.name)
if result_tlength > self.send_token_limit // 3:
result = ActionErrorResult(
reason=f"Command {tool.name} returned too much output. "
"Do not execute this command again with the same arguments."
)
await self.run_pipeline(AfterExecute.after_execute, result)
logger.debug("\n".join(self.trace))
return result
async def do_not_execute(
self, denied_proposal: OneShotAgentActionProposal, user_feedback: str
) -> ActionResult:
result = ActionInterruptedByHuman(feedback=user_feedback)
self.log_cycle_handler.log_cycle(
self.state.ai_profile.ai_name,
self.created_at,
self.config.cycle_count,
user_feedback,
USER_INPUT_FILE_NAME,
)
await self.run_pipeline(AfterExecute.after_execute, result)
logger.debug("\n".join(self.trace))
return result
async def _execute_tool(self, tool_call: AssistantFunctionCall) -> CommandOutput:
"""Execute the command and return the result
Args:
tool_call (AssistantFunctionCall): The tool call to execute
Returns:
str: The execution result
"""
# Execute a native command with the same name or alias, if it exists
command = self._get_command(tool_call.name)
try:
result = command(**tool_call.arguments)
if inspect.isawaitable(result):
return await result
return result
except AgentException:
raise
except Exception as e:
raise CommandExecutionError(str(e))
def _get_command(self, command_name: str) -> Command:
for command in reversed(self.commands):
if command_name in command.names:
return command
raise UnknownCommandError(
f"Cannot execute command '{command_name}': unknown command."
)
def _remove_disabled_commands(self) -> None:
self.commands = [
command
for command in self.commands
if not any(
name in self.legacy_config.disabled_commands for name in command.names
)
]
def find_obscured_commands(self) -> list[Command]:
seen_names = set()
obscured_commands = []
for command in reversed(self.commands):
# If all of the command's names have been seen, it's obscured
if seen_names.issuperset(command.names):
obscured_commands.append(command)
else:
seen_names.update(command.names)
return list(reversed(obscured_commands))
是一个助理AI系统的实现。以下是代码的功能概述:
- 导入必要的模块和类,包括日志记录、时间处理、类型检查等。
- 定义了AgentConfiguration、AgentSettings和Agent类,用于配置助理代理的设置和行为。
- 实现了用于处理指令、命令、消息等的Provider类。
- 实现了用于执行代码、文件管理、图片生成、web搜索等功能的组件。
- 定义了"propose_action"方法,用于提出下一步的操作建议。
- 包含了对话提示、日志处理、异常处理等模块。
- 实现了"execute"和"do_not_execute"方法,用于执行或终止操作建议。
- 包含了辅助方法用于执行工具调用、找到可执行命令等功能。
- 使用Pydantic进行数据验证和模型定义。
- 包含了一些辅助方法和变量,用于监控操作、管理上下文等。
总体而言,这些代码构建了一个助理AI系统,用于提出决策建议、执行操作,并与用户进行交互。
另外一个就是利用gpt4 类似的LLM进行任务编排,代码在:
one_shot.py
from __future__ import annotations
import json
import platform
import re
from logging import Logger
import distro
from forge.config.ai_directives import AIDirectives
from forge.config.ai_profile import AIProfile
from forge.json.parsing import extract_dict_from_json
from forge.llm.prompting import ChatPrompt, LanguageModelClassification, PromptStrategy
from forge.llm.prompting.utils import format_numbered_list
from forge.llm.providers.schema import (
AssistantChatMessage,
ChatMessage,
CompletionModelFunction,
)
from forge.models.action import ActionProposal
from forge.models.config import SystemConfiguration, UserConfigurable
from forge.models.json_schema import JSONSchema
from forge.models.utils import ModelWithSummary
from forge.utils.exceptions import InvalidAgentResponseError
from pydantic import Field
_RESPONSE_INTERFACE_NAME = "AssistantResponse"
class AssistantThoughts(ModelWithSummary):
observations: str = Field(
..., description="Relevant observations from your last action (if any)"
)
text: str = Field(..., description="Thoughts")
reasoning: str = Field(..., description="Reasoning behind the thoughts")
self_criticism: str = Field(..., description="Constructive self-criticism")
plan: list[str] = Field(
..., description="Short list that conveys the long-term plan"
)
speak: str = Field(..., description="Summary of thoughts, to say to user")
def summary(self) -> str:
return self.text
class OneShotAgentActionProposal(ActionProposal):
thoughts: AssistantThoughts
class OneShotAgentPromptConfiguration(SystemConfiguration):
DEFAULT_BODY_TEMPLATE: str = (
"## Constraints\n"
"You operate within the following constraints:\n"
"{constraints}\n"
"\n"
"## Resources\n"
"You can leverage access to the following resources:\n"
"{resources}\n"
"\n"
"## Commands\n"
"These are the ONLY commands you can use."
" Any action you perform must be possible through one of these commands:\n"
"{commands}\n"
"\n"
"## Best practices\n"
"{best_practices}"
)
DEFAULT_CHOOSE_ACTION_INSTRUCTION: str = (
"Determine exactly one command to use next based on the given goals "
"and the progress you have made so far, "
"and respond using the JSON schema specified previously:"
)
body_template: str = UserConfigurable(default=DEFAULT_BODY_TEMPLATE)
choose_action_instruction: str = UserConfigurable(
default=DEFAULT_CHOOSE_ACTION_INSTRUCTION
)
use_functions_api: bool = UserConfigurable(default=False)
#########
# State #
#########
# progress_summaries: dict[tuple[int, int], str] = Field(
# default_factory=lambda: {(0, 0): ""}
# )
class OneShotAgentPromptStrategy(PromptStrategy):
default_configuration: OneShotAgentPromptConfiguration = (
OneShotAgentPromptConfiguration()
)
def __init__(
self,
configuration: OneShotAgentPromptConfiguration,
logger: Logger,
):
self.config = configuration
self.response_schema = JSONSchema.from_dict(OneShotAgentActionProposal.schema())
self.logger = logger
@property
def model_classification(self) -> LanguageModelClassification:
return LanguageModelClassification.FAST_MODEL # FIXME: dynamic switching
def build_prompt(
self,
*,
messages: list[ChatMessage],
task: str,
ai_profile: AIProfile,
ai_directives: AIDirectives,
commands: list[CompletionModelFunction],
include_os_info: bool,
**extras,
) -> ChatPrompt:
"""Constructs and returns a prompt with the following structure:
1. System prompt
3. `cycle_instruction`
"""
system_prompt, response_prefill = self.build_system_prompt(
ai_profile=ai_profile,
ai_directives=ai_directives,
commands=commands,
include_os_info=include_os_info,
)
final_instruction_msg = ChatMessage.user(self.config.choose_action_instruction)
return ChatPrompt(
messages=[
ChatMessage.system(system_prompt),
ChatMessage.user(f'"""{task}"""'),
*messages,
final_instruction_msg,
],
prefill_response=response_prefill,
functions=commands if self.config.use_functions_api else [],
)
def build_system_prompt(
self,
ai_profile: AIProfile,
ai_directives: AIDirectives,
commands: list[CompletionModelFunction],
include_os_info: bool,
) -> tuple[str, str]:
"""
Builds the system prompt.
Returns:
str: The system prompt body
str: The desired start for the LLM's response; used to steer the output
"""
response_fmt_instruction, response_prefill = self.response_format_instruction(
self.config.use_functions_api
)
system_prompt_parts = (
self._generate_intro_prompt(ai_profile)
+ (self._generate_os_info() if include_os_info else [])
+ [
self.config.body_template.format(
constraints=format_numbered_list(
ai_directives.constraints
+ self._generate_budget_constraint(ai_profile.api_budget)
),
resources=format_numbered_list(ai_directives.resources),
commands=self._generate_commands_list(commands),
best_practices=format_numbered_list(ai_directives.best_practices),
)
]
+ [
"## Your Task\n"
"The user will specify a task for you to execute, in triple quotes,"
" in the next message. Your job is to complete the task while following"
" your directives as given above, and terminate when your task is done."
]
+ ["## RESPONSE FORMAT\n" + response_fmt_instruction]
)
# Join non-empty parts together into paragraph format
return (
"\n\n".join(filter(None, system_prompt_parts)).strip("\n"),
response_prefill,
)
def response_format_instruction(self, use_functions_api: bool) -> tuple[str, str]:
response_schema = self.response_schema.copy(deep=True)
if (
use_functions_api
and response_schema.properties
and "use_tool" in response_schema.properties
):
del response_schema.properties["use_tool"]
# Unindent for performance
response_format = re.sub(
r"\n\s+",
"\n",
response_schema.to_typescript_object_interface(_RESPONSE_INTERFACE_NAME),
)
response_prefill = f'{{\n "{list(response_schema.properties.keys())[0]}":'
return (
(
f"YOU MUST ALWAYS RESPOND WITH A JSON OBJECT OF THE FOLLOWING TYPE:\n"
f"{response_format}"
+ ("\n\nYOU MUST ALSO INVOKE A TOOL!" if use_functions_api else "")
),
response_prefill,
)
def _generate_intro_prompt(self, ai_profile: AIProfile) -> list[str]:
"""Generates the introduction part of the prompt.
Returns:
list[str]: A list of strings forming the introduction part of the prompt.
"""
return [
f"You are {ai_profile.ai_name}, {ai_profile.ai_role.rstrip('.')}.",
"Your decisions must always be made independently without seeking "
"user assistance. Play to your strengths as an LLM and pursue "
"simple strategies with no legal complications.",
]
def _generate_os_info(self) -> list[str]:
"""Generates the OS information part of the prompt.
Params:
config (Config): The configuration object.
Returns:
str: The OS information part of the prompt.
"""
os_name = platform.system()
os_info = (
platform.platform(terse=True)
if os_name != "Linux"
else distro.name(pretty=True)
)
return [f"The OS you are running on is: {os_info}"]
def _generate_budget_constraint(self, api_budget: float) -> list[str]:
"""Generates the budget information part of the prompt.
Returns:
list[str]: The budget information part of the prompt, or an empty list.
"""
if api_budget > 0.0:
return [
f"It takes money to let you run. "
f"Your API budget is ${api_budget:.3f}"
]
return []
def _generate_commands_list(self, commands: list[CompletionModelFunction]) -> str:
"""Lists the commands available to the agent.
Params:
agent: The agent for which the commands are being listed.
Returns:
str: A string containing a numbered list of commands.
"""
try:
return format_numbered_list([cmd.fmt_line() for cmd in commands])
except AttributeError:
self.logger.warning(f"Formatting commands failed. {commands}")
raise
def parse_response_content(
self,
response: AssistantChatMessage,
) -> OneShotAgentActionProposal:
if not response.content:
raise InvalidAgentResponseError("Assistant response has no text content")
self.logger.debug(
"LLM response content:"
+ (
f"\n{response.content}"
if "\n" in response.content
else f" '{response.content}'"
)
)
assistant_reply_dict = extract_dict_from_json(response.content)
self.logger.debug(
"Parsing object extracted from LLM response:\n"
f"{json.dumps(assistant_reply_dict, indent=4)}"
)
parsed_response = OneShotAgentActionProposal.parse_obj(assistant_reply_dict)
if self.config.use_functions_api:
if not response.tool_calls:
raise InvalidAgentResponseError("Assistant did not use a tool")
parsed_response.use_tool = response.tool_calls[0].function
return parsed_response
这段代码实现了一个助理AI系统,本质上是ReAct框架,主要包括以下功能:
- 定义了助理AI的思考模型AssistantThoughts,包括观察、思考、推理、自我批评、计划和口头表达等属性。
- 定义了OneShotAgentActionProposal类,用于存储助理AI提出的行动建议和思考。
- 实现了OneShotAgentPromptConfiguration和OneShotAgentPromptStrategy类,用于配置和构建助理AI的提示信息和操作策略。
- 使用Pydantic库进行数据验证和模型定义。
- 实现了各种辅助方法,用于生成系统提示、解析回复内容等功能。
- 包含了与JSON数据交互、日志记录、异常处理等相关模块和函数。
总体而言,这段代码涵盖了助理AI系统中与提示信息、响应解析、操作建议等方面相关的关键功能。