Observability Best Practices
Best practices for implementing effective tracing in AI applications, LLM systems, and RAG pipelines
Follow these best practices to implement effective tracing in your AI applications, ensuring you get maximum value from observability data.
🎯 Meaningful Span Names
Descriptive and Consistent Names
# ✅ Good span names
"llm-completion"
"document-retrieval"
"user-authentication"
"payment-processing"
# ❌ Poor span names
"function1"
"process"
"api_call"
"step"
Action-Oriented Naming
# Good: Action-oriented
"classify-query"
"generate-response"
"validate-input"
"retrieve-documents"
# Bad: State-oriented
"query-classification"
"response-generation"
"input-validation"
"document-retrieval"
Hierarchical Naming for Complex Operations
# Use hierarchical naming for complex operations
"rag-pipeline"
"rag-pipeline.query-analysis"
"rag-pipeline.document-retrieval"
"rag-pipeline.answer-generation"
"multi-agent-workflow"
"multi-agent-workflow.task-planning"
"multi-agent-workflow.agent-researcher"
"multi-agent-workflow.result-synthesis"
📊 Rich Attributes
Include Context for Debugging and Analysis
# ✅ Rich attributes using context managers
with trace_llm(model="gpt-4", operation="llm-call") as span:
span.set_attributes({
"user.id": user_id,
"user.plan": "premium",
"llm.model": "gpt-4",
"llm.temperature": 0.7,
"prompt.category": "technical_question",
"response.confidence": 0.92
})
# ❌ Minimal attributes
span.set_attributes({"status": "ok"})
AI-Specific Attribute Categories
# LLM Attributes
span.set_attributes({
"llm.model": "gpt-4",
"llm.provider": "openai",
"llm.temperature": 0.7,
"llm.max_tokens": 1000,
"llm.top_p": 0.9
})
# Cost Attributes
span.set_attributes({
"llm.tokens.input": response.usage.prompt_tokens,
"llm.tokens.output": response.usage.completion_tokens,
"llm.cost.estimated": calculate_cost(response.usage)
})
# User Attributes
span.set_attributes({
"user.id": user_id,
"user.plan": "premium",
"user.location": "us-west",
"user.tier": "enterprise"
})
# Content Attributes
span.set_attributes({
"prompt.length": len(prompt),
"response.length": len(response),
"content.type": "technical_question",
"content.language": "en"
})
# Quality Attributes
span.set_attributes({
"relevance.score": 0.85,
"confidence.level": 0.92,
"accuracy.rating": "high"
})
Performance Attributes
import time
import psutil
from noveum_trace.context_managers import trace_operation
def expensive_operation_with_performance_tracking():
with trace_operation("expensive-operation") as span:
start_memory = psutil.Process().memory_info().rss / 1024 / 1024 # MB
start_time = time.time()
try:
result = expensive_operation()
end_time = time.time()
end_memory = psutil.Process().memory_info().rss / 1024 / 1024 # MB
span.set_attributes({
'performance.duration_ms': (end_time - start_time) * 1000,
'performance.memory_delta_mb': end_memory - start_memory,
'performance.cpu_intensive': True
})
return result
except Exception as error:
span.set_attributes({'performance.failed': True})
raise
🛡️ Error Handling
Comprehensive Error Capture
from noveum_trace.context_managers import trace_operation
from datetime import datetime
with trace_operation("expensive-ai-operation") as span:
try:
result = expensive_ai_operation()
span.set_attributes({
"operation.success": True,
"operation.result_quality": assess_quality(result)
})
except Exception as e:
span.set_attributes({
"operation.success": False,
"error.type": type(e).__name__,
"error.message": str(e)
})
span.add_event("operation.failed", {
"error.timestamp": datetime.now().isoformat(),
"error.recoverable": is_recoverable_error(e)
})
raise
Error Context and Recovery
with trace_operation("api-call") as span:
retry_count = 0
max_retries = 3
while retry_count <= max_retries:
try:
span.add_event("api.call.attempted", {
"attempt": retry_count + 1,
"max_retries": max_retries
})
result = make_api_call()
span.add_event("api.call.succeeded", {
"attempt": retry_count + 1,
"duration_ms": time.time() - start_time
})
break
except Exception as e:
retry_count += 1
span.add_event("api.call.failed", {
"attempt": retry_count,
"error.type": type(e).__name__,
"error.message": str(e),
"will_retry": retry_count <= max_retries
})
if retry_count > max_retries:
span.set_status("error", f"Max retries exceeded: {str(e)}")
raise
🧠 AI-Specific Tracing Patterns
RAG Pipeline Tracing
from noveum_trace.context_managers import trace_operation, trace_llm
def rag_query(question: str) -> str:
with trace_operation("rag-pipeline") as main_span:
# Phase 1: Query understanding
with trace_operation("query-analysis") as step:
intent = analyze_query_intent(question)
step.set_attributes({
"query.intent": intent,
"query.complexity": get_complexity_score(question),
"query.length": len(question)
})
# Phase 2: Retrieval
with trace_operation("document-retrieval") as step:
embeddings = generate_embeddings(question)
documents = vector_search(embeddings, k=5)
step.set_attributes({
"retrieval.query_embedding_time": embedding_time,
"retrieval.search_time": search_time,
"retrieval.documents_found": len(documents),
"retrieval.avg_similarity": avg_similarity(documents)
})
# Phase 3: Generation
with trace_llm(model="gpt-4", operation="answer-generation") as step:
context = build_context(documents)
answer = generate_answer_with_context(question, context)
step.set_attributes({
"generation.context_length": len(context),
"generation.answer_length": len(answer),
"generation.model": "gpt-4"
})
return answer
Multi-Agent Tracing
from noveum_trace.context_managers import trace_operation, trace_llm
def multi_agent_task(task: str):
with trace_operation("multi-agent-task") as main_span:
# Agent coordination
with trace_operation("task-planning") as planning_span:
planning_span.set_attributes({
"task.type": classify_task(task),
"agents.required": ['researcher', 'writer', 'reviewer']
})
plan = planning_agent.create_plan(task)
# Individual agent execution
results = []
for step in plan.steps:
with trace_operation(f"agent-{step.agent}") as agent_span:
agent_span.set_attributes({
"agent.name": step.agent,
"agent.task": step.task,
"agent.tools": step.tools
})
agent_result = execute_agent_step(step)
agent_span.set_attributes({
"agent.success": agent_result.success,
"agent.confidence": agent_result.confidence
})
results.append(agent_result)
# Final synthesis
with trace_operation("result-synthesis") as synthesis_span:
return synthesize_results(results)
LLM Call Tracing
from noveum_trace.context_managers import trace_llm
import openai
def call_llm(model: str, prompt: str, user_id: str):
with trace_llm(model=model, operation="llm-call") as span:
# Add attributes for context
span.set_attributes({
"llm.model": model,
"llm.provider": "openai",
"user.id": user_id,
"prompt.length": len(prompt),
"prompt.type": "user_query"
})
response = openai.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}]
)
# Add response attributes
span.set_attributes({
"llm.tokens.input": response.usage.prompt_tokens,
"llm.tokens.output": response.usage.completion_tokens,
"llm.cost.estimated": calculate_cost(response.usage)
})
return response.choices[0].message.content
🎪 Event Patterns
Start/Complete Pattern
from noveum_trace.context_managers import trace_operation
from datetime import datetime
def process_document(doc_id: str):
with trace_operation("document-processing") as span:
span.add_event("processing.started", {
"document.id": doc_id,
"timestamp": datetime.now().isoformat()
})
try:
# Processing logic
chunks = split_document(doc_id)
span.add_event("document.chunked", {
"chunks.count": len(chunks),
"chunks.avg_size": sum(len(c) for c in chunks) / len(chunks)
})
embeddings = generate_embeddings(chunks)
span.add_event("embeddings.generated", {
"embeddings.count": len(embeddings),
"embeddings.model": "text-embedding-ada-002"
})
return embeddings
except Exception as e:
span.add_event("processing.failed", {
"error.type": type(e).__name__,
"error.message": str(e)
})
raise
State Change Events
with trace_operation("ai-completion") as span:
# Initial state
span.add_event("ai.initialization", {
"model": "gpt-3.5-turbo",
"temperature": 0.7
})
# State change
if query_complexity > 0.8:
span.add_event("ai.model.upgraded", {
"from.model": "gpt-3.5-turbo",
"to.model": "gpt-4",
"reason": "high_complexity"
})
# Final state
span.add_event("ai.completion.ready", {
"final.model": "gpt-4",
"tokens.estimated": 200
})
🔍 Debugging Strategies
Common Debugging Scenarios
1. Slow Response Times
Look for spans with high duration:
- Is the LLM call taking too long?
- Is document retrieval the bottleneck?
- Are there unnecessary sequential operations?
2. High Costs
Analyze cost-related attributes:
- Which models are being used?
- How many tokens are being consumed?
- Are there redundant API calls?
3. Quality Issues
Examine quality attributes:
- What's the confidence score of responses?
- How relevant are retrieved documents?
- Are there patterns in failed operations?
4. Error Patterns
Filter by error events and attributes:
- What types of errors are most common?
- Do errors correlate with specific users/inputs?
- Are errors happening at specific times?
Performance Monitoring
# Monitor key performance metrics
span.set_attributes({
"performance.duration_ms": duration_ms,
"performance.memory_usage_mb": memory_usage,
"performance.cpu_usage_percent": cpu_usage,
"performance.cache_hit_rate": cache_hit_rate
})
Cost Monitoring
# Track AI costs
span.set_attributes({
"cost.tokens_input": input_tokens,
"cost.tokens_output": output_tokens,
"cost.usd_estimated": estimated_cost,
"cost.model": model_name
})
🎯 Context Management
Consistent Context Propagation
# Set context at the trace level
with trace_operation("customer-query") as main_span:
main_span.set_attributes({
"customer.id": customer_id,
"query.type": "support",
"session.id": session_id
})
# Child spans inherit context
with trace_operation("classify-query") as child_span:
# This span automatically has customer.id and query.type
classification = classify_query(query)
Business Context
# Include business-relevant context
span.set_attributes({
"business.operation": "customer_support",
"business.priority": "high",
"business.customer_tier": "premium",
"business.region": "us-west",
"business.feature": "chatbot"
})
🚀 Next Steps
Now that you understand tracing concepts best practices, explore these related concepts:
- Traces Best Practices - Best practices for complete request journeys
- Spans Best Practices - Best practices for individual operations
- Attributes Best Practices - Best practices for metadata and context
- Events Best Practices - Best practices for point-in-time occurrences
Remember: Good observability is not about collecting all possible data, but about collecting the right data that helps you understand, debug, and optimize your AI applications.
Get Early Access to Noveum.ai Platform
Be the first one to get notified when we open Noveum Platform to more users. All users get access to Observability suite for free, early users get free eval jobs and premium support for the first year.