LangGraph, похоже, игнорирует выходные данные инструмента с моделями Gemini.

LangGraph, похоже, игнорирует выходные данные инструмента с моделями Gemini. ⇐ Python

1 сообщение • Страница 1 из 1

Anonymous

LangGraph, похоже, игнорирует выходные данные инструмента с моделями Gemini.

Цитата

Сообщение Anonymous » 02 янв 2026, 19:04

У меня возникла проблема: LLM правильно вызывает инструмент (вызов инструмента прошел успешно), но, по-видимому, он не может интерпретировать или обработать результат работы инструмента.
Вот код, воспроизводящий проблему:
import asyncio
import base64
import os
from pathlib import Path
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage, SystemMessage
from langchain.chat_models import init_chat_model
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import MessagesState
from langgraph.prebuilt import ToolNode

# ============== CONFIGURATION ==============
LLM_MODEL = "gemini-2.5-flash"
LLM_MODEL_PROVIDER = "google_genai"
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")

# Path to test file
SCRIPT_DIR = Path(__file__).parent
TEST_FILE = SCRIPT_DIR / "test.pdf"

# ============== TOOLS ==============
@tool
def get_client(company_name: str) -> str:
"""
Search for a client by name.
Returns matching clients or empty if not found.
"""
return [{"id": "f4764430-5add-446f-b35b-a9c4e272c27c", "name": "Apple"}]

TOOLS = [get_client]

# ============== STATE ==============
class AgentState(MessagesState):
"""Simple state with just messages."""
pass

# ============== SYSTEM PROMPT ==============
SYSTEM_PROMPT = """You are an order entry assistant. Extract client information from documents.

TASK:
1. Look at the provided document/text and find the client/customer name
2. Call get_client with the client name you found
3. Report the result (client name and its ID)

IMPORTANT:
- Read tool responses carefully
- The tool returns a list of similar companies. Evaluate the results:
- SUCCESS: If any result has a company name that is clearly the same entity (ignore minor formatting like spaces, periods, capitalization), that IS a match. Use that result's `id` and report success.
- FAILURE: Only if the list is empty OR the names are completely different companies.
"""

# ============== LLM ==============
llm = init_chat_model(
model=LLM_MODEL,
model_provider=LLM_MODEL_PROVIDER,
google_api_key=GEMINI_API_KEY,
)
llm_with_tools = llm.bind_tools(TOOLS)

# ============== NODES ==============
async def process_node(state: AgentState) -> AgentState:
"""Main processing node - calls LLM with tools."""
result = await llm_with_tools.ainvoke(state["messages"])

if result.tool_calls:
for tc in result.tool_calls:
print(f" - {tc['name']}: {tc['args']}")

return {"messages": [result]}

# ============== ROUTING ==============
def route_tools(state: AgentState) -> str:
"""Route to tools if there are tool calls, otherwise to finalize."""
messages = state.get("messages", [])
if not messages:
return END

last_message = messages[-1]
if hasattr(last_message, "tool_calls") and last_message.tool_calls:
print(" -> Routing to: tools")
return "tools"

print(" -> Routing to: finalize")
return "finalize"

# ============== BUILD GRAPH ==============
def build_graph():
"""Build the agent graph."""
builder = StateGraph(AgentState)

tool_node = ToolNode(TOOLS)
builder.add_node("process", process_node)
builder.add_node("tools", tool_node)
builder.add_edge(START, "process")
builder.add_conditional_edges("process", route_tools, {
"tools": "tools",
"finalize": END,
})
builder.add_edge("tools", "process")

return builder.compile()

# ============== FILE READING ==============
def read_file_as_base64(filepath: Path) -> tuple[str, str]:
"""Read file and return (base64_content, mime_type)."""
with open(filepath, "rb") as f:
content = f.read()

base64_content = base64.b64encode(content).decode('utf-8')

# Detect mime type
if content.startswith(b'%PDF'):
mime_type = 'application/pdf'
elif content.startswith(b'\xff\xd8\xff'):
mime_type = 'image/jpeg'
elif content.startswith(b'\x89PNG'):
mime_type = 'image/png'
else:
mime_type = 'application/pdf' # default

return base64_content, mime_type

def create_file_message(filepath: Path) -> HumanMessage:
"""Create a HumanMessage with file content."""
base64_content, mime_type = read_file_as_base64(filepath)

# Use proper LangChain format for multimodal content
# For images: image_url type with data URL
# For PDFs: Gemini supports PDFs via image_url format
content = [
{"type": "text", "text": f"Process this document and find the client: {filepath.name}"},
{"type": "file", "source_type": "base64", "mime_type": mime_type, "data": base64_content}
]

return HumanMessage(content=content)

# ============== MAIN ==============
async def run_agent(filepath: Path = None, text_input: str = None):
"""Run the agent with file and/or text input."""
graph = build_graph()

# Build messages
messages = [SystemMessage(content=SYSTEM_PROMPT)]

if filepath and filepath.exists():
print(f"Reading file: {filepath}")
messages.append(create_file_message(filepath))
elif text_input:
messages.append(HumanMessage(content=text_input))
else:
print("No input provided!")
return

initial_state = {"messages": messages}

# Run graph
config = {"configurable": {"thread_id": "debug-1"}, "recursion_limit": 20}
result = await graph.ainvoke(initial_state, config=config)
return result

async def main():
# Try file first, fall back to text
if TEST_FILE.exists():
await run_agent(filepath=TEST_FILE)
else:
print(f"File not found: {TEST_FILE}")
print("Using text input instead...\n")
await run_agent(text_input="Find the id of the client Apple")

if __name__ == "__main__":
asyncio.run(main())

Проблема, судя по всему, связана с наличием файла, так как при вызове в текстовом режиме все работает. Я не могу загрузить файлы PDF, но файлы test.pdf представляют собой не что иное, как это:

Агент правильно прочитал документ и определил Apple как целевую компанию. Тем не менее, он не может выполнить сопоставление с выходными данными инструмента. Моя среда (в Windows):
langchain==1.2.0
langchain-core==1.2.4
langchain-google-genai==4.1.2
langgraph==1.0.5
google-genai==1.56.0

Подробнее здесь: https://stackoverflow.com/questions/798 ... ini-models

1767369878

Anonymous

У меня возникла проблема: LLM правильно вызывает инструмент (вызов инструмента прошел успешно), но, по-видимому, он не может интерпретировать или обработать результат работы инструмента.
Вот код, воспроизводящий проблему:
import asyncio
import base64
import os
from pathlib import Path
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage, SystemMessage
from langchain.chat_models import init_chat_model
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import MessagesState
from langgraph.prebuilt import ToolNode

# ============== CONFIGURATION ==============
LLM_MODEL = "gemini-2.5-flash"
LLM_MODEL_PROVIDER = "google_genai"
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")

# Path to test file
SCRIPT_DIR = Path(__file__).parent
TEST_FILE = SCRIPT_DIR / "test.pdf"

# ============== TOOLS ==============
@tool
def get_client(company_name: str) -> str:
"""
Search for a client by name.
Returns matching clients or empty if not found.
"""
return [{"id": "f4764430-5add-446f-b35b-a9c4e272c27c", "name": "Apple"}]

TOOLS = [get_client]

# ============== STATE ==============
class AgentState(MessagesState):
"""Simple state with just messages."""
pass

# ============== SYSTEM PROMPT ==============
SYSTEM_PROMPT = """You are an order entry assistant. Extract client information from documents.

TASK:
1. Look at the provided document/text and find the client/customer name
2. Call get_client with the client name you found
3. Report the result (client name and its ID)

IMPORTANT:
- Read tool responses carefully
- The tool returns a list of similar companies. Evaluate the results:
- SUCCESS: If any result has a company name that is clearly the same entity (ignore minor formatting like spaces, periods, capitalization), that IS a match.  Use that result's `id` and report success.
- FAILURE: Only if the list is empty OR the names are completely different companies.
"""

# ============== LLM ==============
llm = init_chat_model(
model=LLM_MODEL,
model_provider=LLM_MODEL_PROVIDER,
google_api_key=GEMINI_API_KEY,
)
llm_with_tools = llm.bind_tools(TOOLS)

# ============== NODES ==============
async def process_node(state: AgentState) -> AgentState:
"""Main processing node - calls LLM with tools."""
result = await llm_with_tools.ainvoke(state["messages"])

if result.tool_calls:
for tc in result.tool_calls:
print(f"      - {tc['name']}: {tc['args']}")

return {"messages": [result]}

# ============== ROUTING ==============
def route_tools(state: AgentState) -> str:
"""Route to tools if there are tool calls, otherwise to finalize."""
messages = state.get("messages", [])
if not messages:
return END

last_message = messages[-1]
if hasattr(last_message, "tool_calls") and last_message.tool_calls:
print("    -> Routing to: tools")
return "tools"

print("    -> Routing to: finalize")
return "finalize"

# ============== BUILD GRAPH ==============
def build_graph():
"""Build the agent graph."""
builder = StateGraph(AgentState)

tool_node = ToolNode(TOOLS)
builder.add_node("process", process_node)
builder.add_node("tools", tool_node)
builder.add_edge(START, "process")
builder.add_conditional_edges("process", route_tools, {
"tools": "tools",
"finalize": END,
})
builder.add_edge("tools", "process")

return builder.compile()

# ============== FILE READING ==============
def read_file_as_base64(filepath: Path) -> tuple[str, str]:
"""Read file and return (base64_content, mime_type)."""
with open(filepath, "rb") as f:
content = f.read()

base64_content = base64.b64encode(content).decode('utf-8')

# Detect mime type
if content.startswith(b'%PDF'):
mime_type = 'application/pdf'
elif content.startswith(b'\xff\xd8\xff'):
mime_type = 'image/jpeg'
elif content.startswith(b'\x89PNG'):
mime_type = 'image/png'
else:
mime_type = 'application/pdf'  # default

return base64_content, mime_type

def create_file_message(filepath: Path) ->  HumanMessage:
"""Create a HumanMessage with file content."""
base64_content, mime_type = read_file_as_base64(filepath)

# Use proper LangChain format for multimodal content
# For images: image_url type with data URL
# For PDFs: Gemini supports PDFs via image_url format
content = [
{"type": "text", "text": f"Process this document and find the client: {filepath.name}"},
{"type": "file", "source_type": "base64", "mime_type": mime_type, "data": base64_content}
]

return HumanMessage(content=content)

# ============== MAIN ==============
async def run_agent(filepath: Path = None, text_input: str = None):
"""Run the agent with file and/or text input."""
graph = build_graph()

# Build messages
messages = [SystemMessage(content=SYSTEM_PROMPT)]

if filepath and filepath.exists():
print(f"Reading file: {filepath}")
messages.append(create_file_message(filepath))
elif text_input:
messages.append(HumanMessage(content=text_input))
else:
print("No input provided!")
return

initial_state = {"messages": messages}

# Run graph
config = {"configurable": {"thread_id": "debug-1"}, "recursion_limit": 20}
result = await graph.ainvoke(initial_state, config=config)
return result

async def main():
# Try file first, fall back to text
if TEST_FILE.exists():
await run_agent(filepath=TEST_FILE)
else:
print(f"File not found: {TEST_FILE}")
print("Using text input instead...\n")
await run_agent(text_input="Find the id of the client Apple")

if __name__ == "__main__":
asyncio.run(main())

Проблема, судя по всему, связана с наличием файла, так как при вызове в текстовом режиме все работает. Я не могу загрузить файлы PDF, но файлы test.pdf представляют собой не что иное, как это:
[img]https://canada1.discourse-cdn.com/flex007/uploads/langchain/optimized/2X/3/3dd723968b279a6984bb1be498c7c11ad4787d84_2_345x157.png[/img]

Агент правильно прочитал документ и определил Apple как целевую компанию. Тем не менее, он не может выполнить сопоставление с выходными данными инструмента. Моя среда (в Windows):
langchain==1.2.0
langchain-core==1.2.4
langchain-google-genai==4.1.2
langgraph==1.0.5
google-genai==1.56.0
 

Подробнее здесь: [url]https://stackoverflow.com/questions/79859269/langgraph-seems-to-ignore-tool-outputs-with-gemini-models[/url]