Documentation
Best Practices/Traces Best Practices

Traces Best Practices

Best practices for creating effective traces in your AI applications

Follow these best practices to create effective, meaningful traces that provide valuable insights into your AI applications.

🎯 Trace Naming

Descriptive and Consistent Names

# Good: Descriptive and consistent
trace_operation("customer-support-query")
trace_operation("rag-pipeline")
trace_operation("multi-agent-workflow")
 
# Bad: Generic or unclear
trace_operation("process")
trace_operation("main")
trace_operation("function")

Use Action-Oriented Names

# Good: Action-oriented
trace_operation("process-customer-query")
trace_operation("generate-ai-response")
trace_operation("validate-user-input")
 
# Bad: State-oriented
trace_operation("customer-query")
trace_operation("ai-response")
trace_operation("user-input")

📊 Attribute Organization

# Group related attributes logically
span.set_attributes({
    # Customer context
    "customer.id": customer_id,
    "customer.tier": customer_tier,
    "customer.region": customer_region,
    
    # Query context
    "query.type": query_type,
    "query.length": len(query),
    "query.language": query_language,
    
    # AI context
    "ai.model": model_name,
    "ai.provider": provider,
    "ai.temperature": temperature
})

Use Consistent Naming Conventions

# Use hierarchical naming with dots
span.set_attributes({
    "business.customer_id": "cust_123",
    "business.operation": "support_query",
    "business.priority": "high",
    
    "ai.model": "gpt-4",
    "ai.provider": "openai",
    "ai.temperature": 0.7,
    
    "system.duration_ms": 1800,
    "system.status": "success"
})

🛡️ Error Handling

Comprehensive Error Tracking

with trace_operation("risky-operation") as span:
    try:
        result = risky_operation()
        span.set_status("success")
        return result
    except Exception as e:
        span.set_status("error", str(e))
        span.add_event("error.occurred", {
            "error.type": type(e).__name__,
            "error.message": str(e),
            "error.stack": traceback.format_exc()
        })
        raise

Error Context and Recovery

with trace_operation("api-call") as span:
    retry_count = 0
    max_retries = 3
    
    while retry_count <= max_retries:
        try:
            result = make_api_call()
            span.set_status("success")
            return result
        except Exception as e:
            retry_count += 1
            span.add_event("error.retry", {
                "error.type": type(e).__name__,
                "retry.attempt": retry_count,
                "retry.max_attempts": max_retries,
                "retry.will_retry": retry_count <= max_retries
            })
            
            if retry_count > max_retries:
                span.set_status("error", f"Max retries exceeded: {str(e)}")
                raise

🎪 Event Timing

Meaningful Event Placement

with trace_operation("process-query") as span:
    # Start event with context
    span.add_event("operation.started", {
        "timestamp": time.time(),
        "input.size": len(query),
        "input.type": "text"
    })
    
    # Process the query
    result = process_query(query)
    
    # Completion event with results
    span.add_event("operation.completed", {
        "timestamp": time.time(),
        "output.size": len(result),
        "success": True,
        "duration_ms": time.time() - start_time
    })

State Change Events

with trace_operation("ai-completion") as span:
    # Initial state
    span.add_event("ai.initialization", {
        "model": "gpt-3.5-turbo",
        "temperature": 0.7
    })
    
    # State change
    if query_complexity > 0.8:
        span.add_event("ai.model.upgraded", {
            "from.model": "gpt-3.5-turbo",
            "to.model": "gpt-4",
            "reason": "high_complexity"
        })
    
    # Final state
    span.add_event("ai.completion.ready", {
        "final.model": "gpt-4",
        "tokens.estimated": 200
    })

🔗 Span Hierarchy

Logical Parent-Child Relationships

with trace_operation("customer-support-query") as parent_span:
    # Set context at parent level
    parent_span.set_attributes({
        "customer.id": customer_id,
        "query.type": "support"
    })
    
    # Child spans inherit context
    with trace_operation("classify-query") as child_span:
        classification = classify_query(query)
    
    with trace_operation("generate-response") as child_span:
        response = generate_response(query, classification)
    
    # Parent can aggregate child results
    parent_span.set_attributes({
        "classification.result": classification,
        "response.length": len(response)
    })

Avoid Deep Nesting

# Good: Reasonable nesting depth
with trace_operation("main-operation") as span:
    with trace_operation("sub-operation-1") as sub_span:
        result1 = operation_1()
    
    with trace_operation("sub-operation-2") as sub_span:
        result2 = operation_2()
 
# Bad: Too deep nesting
with trace_operation("level1") as span1:
    with trace_operation("level2") as span2:
        with trace_operation("level3") as span3:
            with trace_operation("level4") as span4:
                with trace_operation("level5") as span5:
                    result = operation()

📈 Performance Considerations

Minimize Overhead

# Good: Essential attributes only
span.set_attributes({
    "customer.id": customer_id,
    "query.type": query_type,
    "ai.model": model_name
})
 
# Bad: Too many attributes
span.set_attributes({
    "customer.id": customer_id,
    "customer.name": customer_name,
    "customer.email": customer_email,
    "customer.phone": customer_phone,
    "customer.address": customer_address,
    # ... 50 more attributes
})

Use Conditional Attributes

# Only add attributes when relevant
if customer_tier == "premium":
    span.set_attribute("customer.priority", "high")
    span.set_attribute("ai.model", "gpt-4")
else:
    span.set_attribute("customer.priority", "normal")
    span.set_attribute("ai.model", "gpt-3.5-turbo")

🎯 Business Context

Include Business Metrics

span.set_attributes({
    "business.operation": "customer_support",
    "business.priority": "high",
    "business.customer_tier": "premium",
    "business.region": "us-west",
    "business.feature": "chatbot",
    "business.cost_center": "support_team"
})

Track Business Outcomes

span.add_event("business.outcome", {
    "customer.satisfaction": 4.5,
    "resolution.time_minutes": 15,
    "escalation.required": False,
    "follow_up.needed": True
})

🔍 Debugging Support

Include Debug Information

span.set_attributes({
    "debug.query_id": query_id,
    "debug.session_id": session_id,
    "debug.user_agent": request.headers.get("user-agent"),
    "debug.timestamp": time.time()
})

Trace Correlation

# Use consistent trace IDs across services
trace_id = generate_trace_id()
span.set_attribute("trace.correlation_id", trace_id)
 
# Pass trace ID to external services
external_service_call(trace_id=trace_id)

🚀 Next Steps

Now that you understand trace best practices, explore these related concepts:


Effective traces are the foundation of observability. By following these best practices, you'll create traces that provide valuable insights into your AI applications.

Exclusive Early Access

Get Early Access to Noveum.ai Platform

Be the first one to get notified when we open Noveum Platform to more users. All users get access to Observability suite for free, early users get free eval jobs and premium support for the first year.

Sign up now. We send access to new batch every week.

Early access members receive premium onboarding support and influence our product roadmap. Limited spots available.