LangGraph, похоже, игнорирует выходные данные инструмента с моделями Gemini.Python

Программы на Python
Ответить
Anonymous
 LangGraph, похоже, игнорирует выходные данные инструмента с моделями Gemini.

Сообщение Anonymous »

У меня возникла проблема: LLM правильно вызывает инструмент (вызов инструмента прошел успешно), но, по-видимому, он не может интерпретировать или обработать результат работы инструмента.
Вот код, воспроизводящий проблему:
import asyncio
import base64
import os
from pathlib import Path
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage, SystemMessage
from langchain.chat_models import init_chat_model
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import MessagesState
from langgraph.prebuilt import ToolNode

# ============== CONFIGURATION ==============
LLM_MODEL = "gemini-2.5-flash"
LLM_MODEL_PROVIDER = "google_genai"
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")

# Path to test file
SCRIPT_DIR = Path(__file__).parent
TEST_FILE = SCRIPT_DIR / "test.pdf"

# ============== TOOLS ==============
@tool
def get_client(company_name: str) -> str:
"""
Search for a client by name.
Returns matching clients or empty if not found.
"""
return [{"id": "f4764430-5add-446f-b35b-a9c4e272c27c", "name": "Apple"}]

TOOLS = [get_client]

# ============== STATE ==============
class AgentState(MessagesState):
"""Simple state with just messages."""
pass

# ============== SYSTEM PROMPT ==============
SYSTEM_PROMPT = """You are an order entry assistant. Extract client information from documents.

TASK:
1. Look at the provided document/text and find the client/customer name
2. Call get_client with the client name you found
3. Report the result (client name and its ID)

IMPORTANT:
- Read tool responses carefully
- The tool returns a list of similar companies. Evaluate the results:
- SUCCESS: If any result has a company name that is clearly the same entity (ignore minor formatting like spaces, periods, capitalization), that IS a match. Use that result's `id` and report success.
- FAILURE: Only if the list is empty OR the names are completely different companies.
"""

# ============== LLM ==============
llm = init_chat_model(
model=LLM_MODEL,
model_provider=LLM_MODEL_PROVIDER,
google_api_key=GEMINI_API_KEY,
)
llm_with_tools = llm.bind_tools(TOOLS)

# ============== NODES ==============
async def process_node(state: AgentState) -> AgentState:
"""Main processing node - calls LLM with tools."""
result = await llm_with_tools.ainvoke(state["messages"])

if result.tool_calls:
for tc in result.tool_calls:
print(f" - {tc['name']}: {tc['args']}")

return {"messages": [result]}

# ============== ROUTING ==============
def route_tools(state: AgentState) -> str:
"""Route to tools if there are tool calls, otherwise to finalize."""
messages = state.get("messages", [])
if not messages:
return END

last_message = messages[-1]
if hasattr(last_message, "tool_calls") and last_message.tool_calls:
print(" -> Routing to: tools")
return "tools"

print(" -> Routing to: finalize")
return "finalize"

# ============== BUILD GRAPH ==============
def build_graph():
"""Build the agent graph."""
builder = StateGraph(AgentState)

tool_node = ToolNode(TOOLS)
builder.add_node("process", process_node)
builder.add_node("tools", tool_node)
builder.add_edge(START, "process")
builder.add_conditional_edges("process", route_tools, {
"tools": "tools",
"finalize": END,
})
builder.add_edge("tools", "process")

return builder.compile()

# ============== FILE READING ==============
def read_file_as_base64(filepath: Path) -> tuple[str, str]:
"""Read file and return (base64_content, mime_type)."""
with open(filepath, "rb") as f:
content = f.read()

base64_content = base64.b64encode(content).decode('utf-8')

# Detect mime type
if content.startswith(b'%PDF'):
mime_type = 'application/pdf'
elif content.startswith(b'\xff\xd8\xff'):
mime_type = 'image/jpeg'
elif content.startswith(b'\x89PNG'):
mime_type = 'image/png'
else:
mime_type = 'application/pdf' # default

return base64_content, mime_type

def create_file_message(filepath: Path) -> HumanMessage:
"""Create a HumanMessage with file content."""
base64_content, mime_type = read_file_as_base64(filepath)

# Use proper LangChain format for multimodal content
# For images: image_url type with data URL
# For PDFs: Gemini supports PDFs via image_url format
content = [
{"type": "text", "text": f"Process this document and find the client: {filepath.name}"},
{"type": "file", "source_type": "base64", "mime_type": mime_type, "data": base64_content}
]

return HumanMessage(content=content)

# ============== MAIN ==============
async def run_agent(filepath: Path = None, text_input: str = None):
"""Run the agent with file and/or text input."""
graph = build_graph()

# Build messages
messages = [SystemMessage(content=SYSTEM_PROMPT)]

if filepath and filepath.exists():
print(f"Reading file: {filepath}")
messages.append(create_file_message(filepath))
elif text_input:
messages.append(HumanMessage(content=text_input))
else:
print("No input provided!")
return

initial_state = {"messages": messages}

# Run graph
config = {"configurable": {"thread_id": "debug-1"}, "recursion_limit": 20}
result = await graph.ainvoke(initial_state, config=config)
return result

async def main():
# Try file first, fall back to text
if TEST_FILE.exists():
await run_agent(filepath=TEST_FILE)
else:
print(f"File not found: {TEST_FILE}")
print("Using text input instead...\n")
await run_agent(text_input="Find the id of the client Apple")

if __name__ == "__main__":
asyncio.run(main())

Проблема, судя по всему, связана с наличием файла, так как при вызове в текстовом режиме все работает. Я не могу загрузить файлы PDF, но файлы test.pdf представляют собой не что иное, как это:
Изображение

Агент правильно прочитал документ и определил Apple как целевую компанию. Тем не менее, он не может выполнить сопоставление с выходными данными инструмента. Моя среда (в Windows):
langchain==1.2.0
langchain-core==1.2.4
langchain-google-genai==4.1.2
langgraph==1.0.5
google-genai==1.56.0


Подробнее здесь: https://stackoverflow.com/questions/798 ... ini-models
Ответить

Быстрый ответ

Изменение регистра текста: 
Смайлики
:) :( :oops: :roll: :wink: :muza: :clever: :sorry: :angel: :read: *x)
Ещё смайлики…
   
К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми.

Максимально разрешённый размер вложения: 15 МБ.

Вернуться в «Python»