Код: Выделить всё
from pydantic_ai import Agent, RunContext
from pydantic_ai.models.openai import OpenAIChatModel
from pydantic_ai.providers.ollama import OllamaProvider
ollama_model = OpenAIChatModel(
model_name="llama3.1:latest",
provider=OllamaProvider(base_url="http://localhost:11434/v1"),
)
roulette_agent = Agent(
ollama_model,
deps_type=int,
output_type=bool,
system_prompt=(
"Use the `roulette_wheel` function to see if the "
"customer has won based on the number they provide."
),
)
@roulette_agent.tool
async def roulette_wheel(ctx: RunContext[int], square: int) -> str:
"""check if the square is a winner"""
return "winner" if square == ctx.deps else "loser"
if __name__ == "__main__":
# Run the agent
success_number = 18
result = roulette_agent.run_sync(
"Put my money on square eighteen", deps=success_number
)
print(result.output)
# > True
Код: Выделить всё
from roulette_wheel import roulette_agent
from pydantic_evals import Case, Dataset
from pydantic_evals.evaluators import EqualsExpected
# Create a dataset with test cases
success_number = 18
dataset = Dataset(
cases=[
Case(
name="no_win",
inputs={"query": "Put my money on square eighteen", "deps": success_number},
expected_output=False,
),
],
evaluators=[
EqualsExpected(), # Check exact match with expected_output
],
)
# Run the evaluation
report = dataset.evaluate_sync(roulette_agent.run_sync)
# Print the results
report.print()
Код: Выделить всё
Evaluating run_sync ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:00:00
Evaluation Summary:
run_sync
┏━━━━━━━━━┳━━━━━━━━━━┓
┃ Case ID ┃ Duration ┃
┡━━━━━━━━━╇━━━━━━━━━━┩
└─────────┴──────────┘
Case Failures
┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Case ID ┃ Error Message ┃
┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ no_win │ UnexpectedModelBehavior: Exceeded maximum retries (1) for output validation │
└─────────┴─────────────────────────────────────────────────────────────────────────────┘
Подробнее здесь: https://stackoverflow.com/questions/798 ... st-prompts
Мобильная версия