API ReferenceClient Methods
Agent Evaluation
Methods for retrieving agent success evaluation results.
Agent Evaluation
get_agent_success_evaluation_results
Get agent success evaluation results to analyze performance.
response = client.get_agent_success_evaluation_results(
agent_version=None,
limit=100
)Prop
Type
Example:
# Get all results
response = client.get_agent_success_evaluation_results(limit=100)
# Calculate success rate
total = len(response.agent_success_evaluation_results)
successful = sum(1 for r in response.agent_success_evaluation_results if r.is_success)
print(f"Success rate: {successful/total*100:.1f}%")
# Review failures
print("\nFailure Analysis:")
for result in response.agent_success_evaluation_results:
if not result.is_success:
print(f"Session: {result.session_id}")
print(f" Failure Type: {result.failure_type}")
print(f" Reason: {result.failure_reason}")
# Compare agent versions
v1_results = client.get_agent_success_evaluation_results(
agent_version="v1.0.0",
limit=100
)
v2_results = client.get_agent_success_evaluation_results(
agent_version="v2.0.0",
limit=100
)
v1_success = sum(1 for r in v1_results.agent_success_evaluation_results if r.is_success)
v2_success = sum(1 for r in v2_results.agent_success_evaluation_results if r.is_success)
print(f"\nVersion Comparison:")
print(f"v1.0.0: {v1_success}/{len(v1_results.agent_success_evaluation_results)} successful")
print(f"v2.0.0: {v2_success}/{len(v2_results.agent_success_evaluation_results)} successful")
# Group failures by type
from collections import Counter
failure_types = Counter(
r.failure_type for r in response.agent_success_evaluation_results if not r.is_success
)
print("\nFailure Types:")
for failure_type, count in failure_types.most_common():
print(f" {failure_type}: {count}")