Documentation
Best Practices/Spans Best Practices

Spans Best Practices

Best practices for creating effective spans in your AI applications

Follow these best practices to create meaningful, well-structured spans that provide clear insights into your operations.

🎯 Span Naming

Clear and Descriptive Names

# Good: Clear and descriptive
trace_operation("gpt-4-completion")
trace_operation("vector-search")
trace_operation("customer-data-processing")
 
# Bad: Generic or unclear
trace_operation("process")
trace_operation("call")
trace_operation("function")

Use Action-Oriented Names

# Good: Action-oriented
trace_operation("classify-query")
trace_operation("generate-response")
trace_operation("validate-input")
 
# Bad: State-oriented
trace_operation("query-classification")
trace_operation("response-generation")
trace_operation("input-validation")

📊 Attribute Naming

Consistent Naming Conventions

# Use consistent naming conventions
span.set_attributes({
    "ai.model": "gpt-4",           # ai.* for AI-specific
    "ai.provider": "openai",       # attributes
    "business.customer_id": "123", # business.* for business
    "system.duration_ms": 1800     # system.* for system
})

Hierarchical Naming

# Use dot notation for logical hierarchies
span.set_attributes({
    "ai.model": "gpt-4",
    "ai.provider": "openai",
    "ai.temperature": 0.7,
    "ai.max_tokens": 1000,
    
    "customer.id": "cust_123",
    "customer.tier": "premium",
    "customer.region": "us-west",
    
    "query.type": "technical_support",
    "query.priority": "high",
    "query.language": "en"
})

🎪 Event Timing

Add Events at Meaningful Points

# Add events at meaningful points
span.add_event("operation.started", {"timestamp": time.time()})
 
# Do the work
result = perform_operation()
 
span.add_event("operation.completed", {
    "timestamp": time.time(),
    "result.size": len(result)
})

State Change Events

with trace_operation("ai-completion") as span:
    # Initial state
    span.add_event("ai.initialization", {
        "model": "gpt-3.5-turbo",
        "temperature": 0.7
    })
    
    # State change
    if query_complexity > 0.8:
        span.add_event("ai.model.upgraded", {
            "from.model": "gpt-3.5-turbo",
            "to.model": "gpt-4",
            "reason": "high_complexity"
        })
    
    # Final state
    span.add_event("ai.completion.ready", {
        "final.model": "gpt-4",
        "tokens.estimated": 200
    })

🛡️ Error Handling

Comprehensive Error Tracking

with trace_operation("risky-operation") as span:
    try:
        result = risky_operation()
        span.set_status("success")
        return result
    except Exception as e:
        span.set_status("error", str(e))
        span.add_event("error.occurred", {
            "error.type": type(e).__name__,
            "error.message": str(e)
        })
        raise

Error Context and Recovery

with trace_operation("api-call") as span:
    retry_count = 0
    max_retries = 3
    
    while retry_count <= max_retries:
        try:
            result = make_api_call()
            span.set_status("success")
            return result
        except Exception as e:
            retry_count += 1
            
            span.add_event("api.call.failed", {
                "attempt": retry_count,
                "error.type": type(e).__name__,
                "error.message": str(e),
                "will_retry": retry_count <= max_retries
            })
            
            if retry_count > max_retries:
                span.set_status("error", f"Max retries exceeded: {str(e)}")
                raise

🔗 Parent-Child Relationships

Logical Hierarchy

with trace_operation("parent-operation") as parent_span:
    # Child span 1
    with trace_operation("child-operation-1") as child1_span:
        result1 = operation_1()
    
    # Child span 2
    with trace_operation("child-operation-2") as child2_span:
        result2 = operation_2()
    
    # Parent span can access child results
    parent_span.set_attributes({
        "child1.result": result1,
        "child2.result": result2
    })

Context Inheritance

# Spans automatically inherit context from parents
with trace_operation("customer-query") as parent_span:
    parent_span.set_attributes({
        "customer.id": "cust_123",
        "query.type": "support"
    })
    
    # Child spans inherit customer context
    with trace_operation("classify-query") as child_span:
        # This span automatically has customer.id and query.type
        classification = classify_query(query)

📈 Performance Optimization

Minimize Attribute Overhead

# Good: Essential attributes only
span.set_attributes({
    "customer.id": customer_id,
    "query.type": query_type,
    "ai.model": model_name
})
 
# Bad: Too many attributes
span.set_attributes({
    "customer.id": customer_id,
    "customer.name": customer_name,
    "customer.email": customer_email,
    "customer.phone": customer_phone,
    "customer.address": customer_address,
    # ... 50 more attributes
})

Use Conditional Attributes

# Only add attributes when relevant
if customer_tier == "premium":
    span.set_attribute("customer.priority", "high")
    span.set_attribute("ai.model", "gpt-4")
else:
    span.set_attribute("customer.priority", "normal")
    span.set_attribute("ai.model", "gpt-3.5-turbo")

🎯 AI-Specific Best Practices

LLM Span Attributes

with trace_llm(model="gpt-4", provider="openai") as span:
    response = openai.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": "Hello"}]
    )
    
    # Set usage attributes
    span.set_usage_attributes(
        input_tokens=response.usage.prompt_tokens,
        output_tokens=response.usage.completion_tokens
    )
    
    # Add model-specific attributes
    span.set_attributes({
        "ai.temperature": 0.7,
        "ai.max_tokens": 1000,
        "ai.finish_reason": response.choices[0].finish_reason
    })

Agent Span Context

with trace_agent(agent_type="researcher", agent_id="researcher_001") as span:
    span.set_attributes({
        "agent.capabilities": "web_search,analysis",
        "agent.task": "research_topic",
        "agent.input": topic,
        "agent.context": "customer_support"
    })
    
    result = research_agent.analyze(topic)
    
    span.set_attributes({
        "agent.output": result,
        "agent.confidence": result.confidence,
        "agent.sources_count": len(result.sources)
    })

Tool Execution Spans

with trace_tool(tool_name="web_search", tool_type="api") as span:
    span.set_attributes({
        "tool.input.query": query,
        "tool.input.max_results": 10,
        "tool.input.region": "us-west"
    })
    
    results = web_search_tool.search(query)
    
    span.set_attributes({
        "tool.output.results_count": len(results),
        "tool.output.success": True,
        "tool.output.quality_score": results.quality_score
    })

🔍 Debugging Support

Include Debug Information

span.set_attributes({
    "debug.span_id": span.span_id,
    "debug.trace_id": span.trace_id,
    "debug.timestamp": time.time(),
    "debug.version": "1.2.3"
})

Trace Correlation

# Use consistent correlation IDs
correlation_id = generate_correlation_id()
span.set_attribute("correlation.id", correlation_id)
 
# Pass correlation ID to external services
external_service_call(correlation_id=correlation_id)

🎪 Event Patterns

Start/Complete Pattern

with trace_operation("process-query") as span:
    # Start event
    span.add_event("operation.started", {
        "timestamp": time.time(),
        "input.size": len(query),
        "input.type": "text"
    })
    
    try:
        # Process the query
        result = process_query(query)
        
        # Complete event
        span.add_event("operation.completed", {
            "timestamp": time.time(),
            "output.size": len(result),
            "success": True,
            "duration_ms": time.time() - start_time
        })
        
    except Exception as e:
        # Error event
        span.add_event("operation.failed", {
            "timestamp": time.time(),
            "error.type": type(e).__name__,
            "error.message": str(e),
            "duration_ms": time.time() - start_time
        })
        raise

Retry Pattern

with trace_operation("api-call") as span:
    retry_count = 0
    max_retries = 3
    
    while retry_count <= max_retries:
        try:
            span.add_event("api.call.attempted", {
                "timestamp": time.time(),
                "attempt": retry_count + 1,
                "max_retries": max_retries
            })
            
            result = make_api_call()
            
            span.add_event("api.call.succeeded", {
                "timestamp": time.time(),
                "attempt": retry_count + 1,
                "duration_ms": time.time() - start_time
            })
            
            break
            
        except Exception as e:
            retry_count += 1
            
            span.add_event("api.call.failed", {
                "timestamp": time.time(),
                "attempt": retry_count,
                "error.type": type(e).__name__,
                "error.message": str(e),
                "will_retry": retry_count <= max_retries
            })
            
            if retry_count > max_retries:
                raise

🚀 Next Steps

Now that you understand span best practices, explore these related concepts:


Well-structured spans are the building blocks of observability. By following these best practices, you'll create spans that provide clear insights into your operations.

Exclusive Early Access

Get Early Access to Noveum.ai Platform

Be the first one to get notified when we open Noveum Platform to more users. All users get access to Observability suite for free, early users get free eval jobs and premium support for the first year.

Sign up now. We send access to new batch every week.

Early access members receive premium onboarding support and influence our product roadmap. Limited spots available.