SDK Integration Guide
Integrate Noveum.ai tracing into your AI applications with flexible Python approaches
The Noveum.ai Python SDK provides comprehensive tracing and observability for your AI applications with minimal code changes. Whether you're building LLM applications, RAG systems, or multi-agent workflows, our flexible tracing approaches automatically capture essential metrics and traces.
🚀 Quick Start
1. Create Your Account & Get API Key
- Sign up at noveum.ai
- Generate an API key from the integration page
- Get your API key ready for the next step
2. Install the SDK
pip install noveum-traceRequirements: Python 3.8+
3. Set Environment Variable
Environment Variables:
export NOVEUM_API_KEY="your-api-key"
export NOVEUM_PROJECT="my-ai-app"
export NOVEUM_ENVIRONMENT="development"Important Notes:
Initialization
noveum_trace.init(
api_key=os.getenv("NOVEUM_API_KEY"),
project=os.getenv("NOVEUM_PROJECT"),
environment=os.getenv("NOVEUM_ENVIRONMENT"),
)When you initialize with noveum_trace.init(), the following happens automatically:
- Project Creation: The project gets created in the UI automatically based on the string you provide
- Environment Organization: Environments are used to organize traces (e.g., dev, prod, beta, staging)
🎯 Flexible Tracing Approaches
Approach 1: Context Managers (Recommended)
Context managers provide the most flexible way to trace specific parts of your code without requiring decorators on every function.
import os
import time
from openai import OpenAI
import noveum_trace
from noveum_trace.context_managers import trace_llm, trace_operation
# Initialize Noveum Trace SDK
noveum_trace.init(
api_key=os.getenv("NOVEUM_API_KEY"),
project=os.getenv("NOVEUM_PROJECT"),
environment=os.getenv("NOVEUM_ENVIRONMENT"),
)
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def process_user_query(user_query: str) -> str:
"""Process a user query with granular tracing"""
# Step 1: Enhance the query with LLM (traced)
cleaned_query = user_query.strip().lower()
with trace_llm(model="gpt-3.5-turbo", operation="query_enhancement"):
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a query enhancement assistant."},
{"role": "user", "content": f"Enhance this search query: {cleaned_query}"},
],
)
enhanced_query = response.choices[0].message.content
# Step 2: Simulate database lookup (traced as operation)
with trace_operation("database_lookup"):
# Simulate database query
time.sleep(0.5)
search_results = [
{"id": 1, "title": "Result 1", "relevance": 0.95},
{"id": 2, "title": "Result 2", "relevance": 0.85},
]
# Step 3: Generate final response with LLM (traced)
with trace_llm(model="gpt-4", operation="response_generation"):
context = str(search_results[:2])
response = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": f"Use this context to answer: {context}"},
{"role": "user", "content": cleaned_query},
],
)
final_response = response.choices[0].message.content
return final_response
# Usage
result = process_user_query("What is the capital of France?")
# ✅ Automatically tracked: latency, cost, tokens, model, etc.Approach 2: Manual Span Creation
For legacy code or when you need fine-grained control, you can manually create and manage spans.
import time
import os
import noveum_trace
from noveum_trace import get_client
from noveum_trace.context_managers import trace_operation
# Initialize Noveum Trace
noveum_trace.init(
api_key=os.getenv("NOVEUM_API_KEY"),
project=os.getenv("NOVEUM_PROJECT"),
environment=os.getenv("NOVEUM_ENVIRONMENT"),
)
# Approach 1: Context Managers (Recommended)
def process_data_with_context_manager(query: str):
with trace_operation("data_processing", {"query": query}) as span:
time.sleep(0.5)
result = f"Processed: {query.upper()}"
span.set_attributes({"result_length": len(result)})
return result
# Approach 2: Manual Span Creation (Legacy Code)
def process_data_with_manual_spans(query: str):
client = get_client()
# Create a trace if none exists
trace = None
if not noveum_trace.core.context.get_current_trace():
trace = client.start_trace("manual_trace")
# Create span for the operation
span = client.start_span(
name="legacy_function",
attributes={
"function.name": "process_data_with_manual_spans",
"function.query": query,
},
)
try:
# Simulate some work
time.sleep(0.5)
result = f"Processed: {query.upper()}"
# Add result attributes
span.set_attributes({
"function.result": result,
"function.duration_ms": 500
})
span.set_status("ok")
return result
except Exception as e:
span.record_exception(e)
span.set_status("error", str(e))
raise
finally:
# Always finish the span
client.finish_span(span)
# Finish the trace if we created one
if trace:
client.finish_trace(trace)
# Demo
if __name__ == "__main__":
# Approach 1: Context Manager
result1 = process_data_with_context_manager("user input")
print(f"Context Manager: {result1}")
# Approach 2: Manual Spans
result2 = process_data_with_manual_spans("legacy system query")
print(f"Manual Spans: {result2}")Approach 3: Mixed Approach for Complex Workflows
For complex workflows, you can combine multiple tracing approaches for maximum flexibility.
import openai
from noveum_trace.context_managers import trace_llm, trace_operation
from noveum_trace import get_client
import os
import time
import noveum_trace
noveum_trace.init(
api_key=os.getenv("NOVEUM_API_KEY"),
project=os.getenv("NOVEUM_PROJECT"),
environment=os.getenv("NOVEUM_ENVIRONMENT"),
)
def complex_rag_workflow(user_input: str):
"""Demonstrate a mixed approach for complex RAG workflows"""
# Start a trace for the entire workflow
with trace_operation("rag_workflow") as workflow_span:
workflow_span.set_attributes({
"workflow.input": user_input,
"workflow.start_time": time.time()
})
results = {}
# Step 1: Use context manager for query preprocessing
with trace_operation("query_preprocessing") as process_span:
# Simulate query preprocessing
time.sleep(0.2)
processed_query = user_input.strip().lower()
process_span.set_attributes({
"process.input_length": len(user_input),
"process.output_length": len(processed_query),
})
results["processed_query"] = processed_query
# Step 2: Generate embeddings with LLM context manager
with trace_llm(model="text-embedding-ada-002", operation="embedding_generation") as embedding_span:
# Simulate embedding generation
time.sleep(0.3)
embeddings = [0.1, 0.2, 0.3] # Mock embeddings
embedding_span.set_attributes({
"embedding.model": "text-embedding-ada-002",
"embedding.dimensions": len(embeddings),
"embedding.query": processed_query,
})
results["embeddings"] = embeddings
# Step 3: Use manual span for vector search (legacy system)
client = get_client()
search_span = client.start_span(
name="vector_search",
parent_span_id=workflow_span.span_id,
attributes={"search.operation": "vector_similarity"},
)
try:
# Simulate vector search
time.sleep(0.4)
documents = [
{"id": 1, "content": "Paris is the capital of France", "score": 0.95},
{"id": 2, "content": "France is a country in Europe", "score": 0.85},
]
search_span.set_attributes({
"search.results_count": len(documents),
"search.top_score": documents[0]["score"] if documents else 0,
"search.query": processed_query,
})
results["documents"] = documents
search_span.set_status("ok")
except Exception as e:
search_span.record_exception(e)
search_span.set_status("error", str(e))
raise
finally:
client.finish_span(search_span)
# Step 4: Generate final answer with LLM context manager
with trace_llm(model="gpt-4", operation="answer_generation") as answer_span:
context = "\n".join([doc["content"] for doc in documents])
from openai import OpenAI
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
response = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": f"Answer based on this context: {context}"},
{"role": "user", "content": processed_query}
],
)
final_answer = response.choices[0].message.content
answer_span.set_attributes({
"llm.input_tokens": response.usage.prompt_tokens,
"llm.output_tokens": response.usage.completion_tokens,
"llm.total_tokens": response.usage.total_tokens,
"llm.context_length": len(context),
"llm.answer_length": len(final_answer),
})
results["final_answer"] = final_answer
# Update workflow span with final results
workflow_span.set_attributes({
"workflow.end_time": time.time(),
"workflow.steps_completed": 4,
"workflow.success": True,
"workflow.answer_length": len(final_answer),
})
return results
# Usage
result = complex_rag_workflow("What is the capital of France?")
print(f"Answer: {result['final_answer']}")Approach 4: LangChain Integration (Automatic Tracing)
For LangChain applications, you can use the NoveumTraceCallbackHandler to automatically trace all LangChain operations including LLM calls, chains, agents, tools, and retrieval without modifying your existing code.
Installation:
pip install langchain langchain-openai langchain-communityBasic Setup:
import os
from dotenv import load_dotenv
import noveum_trace
from noveum_trace import NoveumTraceCallbackHandler
load_dotenv()
# Initialize Noveum Trace
noveum_trace.init(
api_key=os.getenv("NOVEUM_API_KEY"),
project="my-langchain-app",
environment="production",
transport_config={"batch_size": 1, "batch_timeout": 5.0},
)Example 1: Basic LLM Tracing
from langchain_openai import ChatOpenAI
def example_basic_llm_tracing():
"""Example: Basic LLM call tracing."""
# Initialize Noveum Trace with batch size 1
noveum_trace.init(
api_key=os.getenv("NOVEUM_API_KEY"),
project="my-langchain-app",
environment="production",
transport_config={"batch_size": 1, "batch_timeout": 5.0},
)
# Create callback handler
callback_handler = NoveumTraceCallbackHandler()
# Create LLM with callback
llm = ChatOpenAI(
model="gpt-3.5-turbo",
temperature=0.7,
callbacks=[callback_handler]
)
# Make LLM call - this will be automatically traced
response = llm.invoke("What is the capital of France?")
print(f"Response: {response.content}")Example 2: Chain Tracing with Multiple Steps
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
def example_chain_tracing():
"""Example: Chain tracing with multiple steps."""
# Create callback handler
callback_handler = NoveumTraceCallbackHandler()
# Create prompt template
prompt = PromptTemplate(
input_variables=["topic"],
template="Write a brief summary about {topic}:"
)
# Create LLM
llm = ChatOpenAI(
model="gpt-3.5-turbo",
temperature=0.5,
callbacks=[callback_handler]
)
# Create chain
chain = LLMChain(
llm=llm,
prompt=prompt,
callbacks=[callback_handler]
)
# Run chain - this will create a trace with nested spans
result = chain.run(topic="artificial intelligence")
print(f"Chain result: {result[:100]}...")Example 3: Agent with Tool Usage
from langchain.agents import AgentType, initialize_agent
from langchain.tools import Tool
from langchain_openai import ChatOpenAI
def example_tool_usage():
"""Example: Tool usage tracing."""
# Create callback handler
callback_handler = NoveumTraceCallbackHandler()
# Define custom tools
def calculator(expression: str) -> str:
"""Simple calculator tool."""
try:
result = eval(expression)
return f"The result is: {result}"
except Exception as e:
return f"Error: {str(e)}"
# Create tools
tools = [
Tool(
name="Calculator",
func=calculator,
description="Use this to perform mathematical calculations",
)
]
# Create LLM
llm = ChatOpenAI(
model="gpt-3.5-turbo",
temperature=0,
callbacks=[callback_handler]
)
# Create agent
agent = initialize_agent(
tools=tools,
llm=llm,
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
callbacks=[callback_handler],
verbose=True,
)
# Use agent with tools
result = agent.run("Calculate 15 * 23")
print(f"Agent result: {result}")Example 4: Error Handling
def example_error_handling():
"""Example: Error handling in tracing."""
# Create callback handler
callback_handler = NoveumTraceCallbackHandler()
# Create LLM with invalid API key to trigger error
llm = ChatOpenAI(
model="gpt-3.5-turbo",
api_key="invalid-key",
callbacks=[callback_handler],
)
try:
# This should fail and be traced as an error
llm.invoke("This will fail")
except Exception as e:
print(f"Expected error occurred: {type(e).__name__}")
print("Error was traced and recorded in span")Complete LangChain Integration Example:
"""
LangChain Integration Example for Noveum Trace SDK.
This example demonstrates how to use the NoveumTraceCallbackHandler to automatically
trace LangChain operations including LLM calls, chains, agents, tools, and retrieval.
Prerequisites:
pip install noveum-trace[langchain]
pip install langchain langchain-openai langchain-community
Environment Variables:
NOVEUM_API_KEY: Your Noveum API key
OPENAI_API_KEY: Your OpenAI API key (for LLM examples)
"""
import os
from dotenv import load_dotenv
import noveum_trace
from noveum_trace import NoveumTraceCallbackHandler
load_dotenv()
def main():
"""Run all LangChain examples."""
print("Noveum Trace - LangChain Integration Examples")
print("=" * 50)
# Check if API keys are set
if not os.getenv("NOVEUM_API_KEY"):
print("Warning: NOVEUM_API_KEY not set. Using mock mode.")
if not os.getenv("OPENAI_API_KEY"):
print("Warning: OPENAI_API_KEY not set. Some examples may fail.")
print()
# Run examples
example_basic_llm_tracing()
example_chain_tracing()
example_tool_usage()
example_error_handling()
print("\n=== Examples Complete ===")
print("Check your Noveum dashboard to see the traced operations!")
# Flush any pending traces
noveum_trace.flush()
if __name__ == "__main__":
main()Key Benefits of LangChain Integration:
- Zero Code Changes: Add tracing to existing LangChain code with just callback handlers
- Automatic Instrumentation: Traces LLM calls, chains, agents, tools, and retrieval automatically
- Rich Metadata: Captures model information, token usage, tool calls, and more
- Error Tracking: Automatically records errors and exceptions in traces
- Hierarchical Spans: Creates proper parent-child relationships for complex workflows
🔧 Framework Integrations
For comprehensive framework-specific integration guides, see our detailed documentation:
- LangChain Integration - Complete LangChain integration guide
- LangGraph Integration - LangGraph agent workflows
- Simple Integration Examples - Basic LLM and agent examples
📊 Advanced Features
Custom Attributes & Events
from datetime import datetime
from noveum_trace.context_managers import trace_operation, trace_llm
def handle_user_request(user_id: str, request: str):
"""Handle user request with comprehensive tracing and custom attributes"""
# Use context manager for the main operation
with trace_operation("user_interaction") as span:
# Add custom attributes
span.set_attributes({
"user.id": user_id,
"user.plan": get_user_plan(user_id),
"request.category": classify_request(request),
"request.length": len(request),
})
# Add events
span.add_event("request.received", {
"timestamp": datetime.now().isoformat(),
"request.length": len(request),
"user.id": user_id,
})
try:
# Process the request with nested tracing
with trace_operation("request_processing") as process_span:
process_span.set_attributes({
"process.stage": "initialization",
"process.user_id": user_id,
})
# Simulate some processing
import time
time.sleep(0.1)
# Add processing event
process_span.add_event("processing.started", {
"timestamp": datetime.now().isoformat(),
"request_type": classify_request(request),
})
# Simulate LLM call for request analysis
with trace_llm(model="gpt-3.5-turbo", operation="request_analysis") as llm_span:
# Mock LLM call for demonstration
analysis_result = f"Analyzed request: {request[:50]}..."
llm_span.set_attributes({
"llm.model": "gpt-3.5-turbo",
"llm.operation": "request_analysis",
"llm.analysis_length": len(analysis_result),
})
llm_span.add_event("analysis.completed", {
"timestamp": datetime.now().isoformat(),
"analysis.result": analysis_result,
})
# Complete processing
result = process_request(request)
process_span.set_attributes({
"process.success": True,
"process.result_length": len(result),
})
process_span.add_event("processing.completed", {
"timestamp": datetime.now().isoformat(),
"success": True,
"response.length": len(result),
})
# Add success event to main span
span.add_event("request.completed", {
"success": True,
"response.length": len(result),
"timestamp": datetime.now().isoformat(),
})
span.set_attributes({
"request.success": True,
"request.response_length": len(result),
})
return result
except Exception as e:
# Add error event
span.add_event("request.failed", {
"error.type": type(e).__name__,
"error.message": str(e),
"timestamp": datetime.now().isoformat(),
})
span.set_attributes({
"request.success": False,
"request.error": str(e),
"request.error_type": type(e).__name__,
})
# Record the exception
span.record_exception(e)
raise
# Helper functions (mock implementations)
def get_user_plan(user_id: str) -> str:
return "premium" if user_id.startswith("premium_") else "basic"
def classify_request(request: str) -> str:
if "help" in request.lower():
return "support"
elif "buy" in request.lower():
return "purchase"
else:
return "general"
def process_request(request: str) -> str:
# Mock processing
return f"Processed: {request}"
# Usage
result = handle_user_request("user_123", "I need help with my account")
print(f"Result: {result}")Sampling Configuration
# Configure sampling for production environments
noveum_trace.init(
api_key="your-api-key",
project="my-app",
environment="production",
sampling_rate=0.1, # Sample 10% of traces by default
sampling_rules=[
{"trace_name": "health-check", "rate": 0.01}, # 1% for health checks
{"trace_name": ".*error.*", "rate": 1.0}, # 100% for errors
{"trace_name": ".*llm.*", "rate": 0.5}, # 50% for LLM calls
{"trace_name": ".*rag.*", "rate": 0.2}, # 20% for RAG pipelines
]
)
# For development, you might want to sample everything
noveum_trace.init(
api_key="your-api-key",
project="my-app",
environment="development",
sampling_rate=1.0, # Sample 100% in development
)LangChain Integration
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from noveum_trace.integrations.langchain import NoveumTraceCallbackHandler
# Initialize LangChain with Noveum tracing
llm = OpenAI(temperature=0.7)
callback_handler = NoveumTraceCallbackHandler()
# Create a chain with tracing
prompt = PromptTemplate(
input_variables=["question"],
template="Answer this question: {question}"
)
chain = LLMChain(llm=llm, prompt=prompt, callbacks=[callback_handler])
# Use the chain - automatically traced
result = chain.run("What is the capital of France?")
print(result)Error Handling & Recovery
from noveum_trace.context_managers import trace_operation, trace_llm
from openai import OpenAI
import os
def robust_llm_call(prompt: str, max_retries: int = 3):
"""LLM call with automatic retry and comprehensive error tracing"""
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
with trace_operation("robust_llm_call") as main_span:
main_span.set_attributes({
"operation.max_retries": max_retries,
"operation.prompt_length": len(prompt),
})
for attempt in range(max_retries):
try:
with trace_llm(model="gpt-4", operation=f"llm_attempt_{attempt + 1}") as llm_span:
llm_span.set_attributes({
"llm.attempt": attempt + 1,
"llm.max_retries": max_retries,
})
response = client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": prompt}],
temperature=0.7
)
result = response.choices[0].message.content
llm_span.set_attributes({
"llm.input_tokens": response.usage.prompt_tokens,
"llm.output_tokens": response.usage.completion_tokens,
"llm.total_tokens": response.usage.total_tokens,
"llm.success": True,
})
main_span.set_attributes({
"operation.success": True,
"operation.attempts_used": attempt + 1,
"operation.final_result_length": len(result),
})
return result
except Exception as e:
llm_span.set_attributes({
"llm.success": False,
"llm.error": str(e),
"llm.error_type": type(e).__name__,
})
main_span.add_event("retry.attempt_failed", {
"attempt": attempt + 1,
"error": str(e),
"error_type": type(e).__name__,
})
if attempt == max_retries - 1:
# Final attempt failed
main_span.set_attributes({
"operation.success": False,
"operation.final_error": str(e),
"operation.attempts_used": max_retries,
})
main_span.record_exception(e)
raise
# Wait before retry
import time
time.sleep(2 ** attempt) # Exponential backoff
# Usage
try:
result = robust_llm_call("Explain quantum computing")
print(f"Success: {result}")
except Exception as e:
print(f"Failed after retries: {e}")📈 View Your Data
Once integrated, visit your Noveum Dashboard to:
- 🔍 Search & Filter traces by any attribute
- 📊 Analyze Performance trends and bottlenecks
- 💰 Monitor Costs across different models and providers
- 🐛 Debug Issues with detailed trace timelines
- 👥 Collaborate with your team on insights
Next Steps
- Tracing Concepts - Learn about traces, spans, and observability best practices
- LangGraph Integration - Observe complex agent workflows
- Dashboard Guide - Master the Noveum platform interface
Get Early Access to Noveum.ai Platform
Be the first one to get notified when we open Noveum Platform to more users. All users get access to Observability suite for free, early users get free eval jobs and premium support for the first year.