Lab 10: DSPy Prompt Optimization¶
⏱️ Estimated completion time: 45 minutes
Overview¶
This lab demonstrates using DSPy for automatic prompt optimization in agent workflows, including signature definition, few-shot learning, and performance metrics.
Learning Objectives¶
- Understanding DSPy signatures and modules
- Implementing automatic prompt optimization
- Measuring and improving agent performance
Key Concepts¶
DSPy Framework¶
- Signatures: Formal specifications of input/output behavior
- Modules: Reusable components with learnable parameters
- Optimizers: Automatic prompt engineering and few-shot selection
Lab Code¶
#!/usr/bin/env python3
"""
DSPy Prompt Optimization Demo
Demonstrate automatic prompt optimization for agent tasks.
"""
try:
import dspy
from dspy import Signature, Module, ChainOfThought, Predict
from dspy.teleprompt import BootstrapFewShot
except ImportError:
print("DSPy not installed. Install with: pip install dspy-ai")
exit(1)
import random
from typing import List, Dict
# Configure DSPy with a mock LM for demo
class MockLM:
def __init__(self):
self.responses = {
"sentiment": ["positive", "negative", "neutral"],
"classification": ["travel", "weather", "general"],
"summary": "This is a summary of the text."
}
def __call__(self, prompt, **kwargs):
# Simple mock responses based on prompt content
if "sentiment" in prompt.lower():
return random.choice(self.responses["sentiment"])
elif "classify" in prompt.lower():
return random.choice(self.responses["classification"])
else:
return self.responses["summary"]
# Set up DSPy with mock LM
dspy.configure(lm=MockLM())
# Define DSPy Signatures
class SentimentAnalysis(Signature):
"""Analyze the sentiment of user input."""
text = dspy.InputField(desc="The input text to analyze")
sentiment = dspy.OutputField(desc="The sentiment: positive, negative, or neutral")
class QueryClassification(Signature):
"""Classify user queries into categories."""
query = dspy.InputField(desc="User query to classify")
category = dspy.OutputField(desc="Category: travel, weather, or general")
class ResponseGeneration(Signature):
"""Generate appropriate responses based on query and sentiment."""
query = dspy.InputField(desc="User query")
sentiment = dspy.InputField(desc="Sentiment of the query")
category = dspy.InputField(desc="Category of the query")
response = dspy.OutputField(desc="Appropriate response to the user")
# Define DSPy Modules
class OptimizedAgent(Module):
"""Agent with optimizable prompt components."""
def __init__(self):
super().__init__()
self.sentiment_analyzer = ChainOfThought(SentimentAnalysis)
self.query_classifier = ChainOfThought(QueryClassification)
self.response_generator = Predict(ResponseGeneration)
def forward(self, user_input):
# Analyze sentiment
sentiment_result = self.sentiment_analyzer(text=user_input)
# Classify query
classification_result = self.query_classifier(query=user_input)
# Generate response
response_result = self.response_generator(
query=user_input,
sentiment=sentiment_result.sentiment,
category=classification_result.category
)
return {
"sentiment": sentiment_result.sentiment,
"category": classification_result.category,
"response": response_result.response
}
def create_training_data():
"""Create mock training data for optimization."""
return [
{
"user_input": "I love traveling to new places!",
"expected_sentiment": "positive",
"expected_category": "travel"
},
{
"user_input": "The weather is terrible today.",
"expected_sentiment": "negative",
"expected_category": "weather"
},
{
"user_input": "How can I help you?",
"expected_sentiment": "neutral",
"expected_category": "general"
},
{
"user_input": "I'm excited about my vacation!",
"expected_sentiment": "positive",
"expected_category": "travel"
}
]
def evaluate_agent(agent, test_data):
"""Evaluate agent performance on test data."""
correct_sentiment = 0
correct_category = 0
total = len(test_data)
for example in test_data:
result = agent(example["user_input"])
if result["sentiment"] == example["expected_sentiment"]:
correct_sentiment += 1
if result["category"] == example["expected_category"]:
correct_category += 1
sentiment_accuracy = correct_sentiment / total
category_accuracy = correct_category / total
return {
"sentiment_accuracy": sentiment_accuracy,
"category_accuracy": category_accuracy,
"overall_score": (sentiment_accuracy + category_accuracy) / 2
}
def main():
print("=== DSPy Prompt Optimization Demo ===")
# Create training and test data
training_data = create_training_data()
test_data = training_data # Using same data for demo
# Initialize agent
agent = OptimizedAgent()
print("\n1. Testing baseline agent:")
baseline_metrics = evaluate_agent(agent, test_data)
print(f"Baseline performance: {baseline_metrics}")
# Define metric function for optimization
def agent_metric(example, prediction):
"""Custom metric for agent evaluation."""
sentiment_correct = prediction["sentiment"] == example["expected_sentiment"]
category_correct = prediction["category"] == example["expected_category"]
return (sentiment_correct + category_correct) / 2
# Prepare training examples for DSPy
dspy_examples = []
for example in training_data:
dspy_example = dspy.Example(
user_input=example["user_input"],
expected_sentiment=example["expected_sentiment"],
expected_category=example["expected_category"]
)
dspy_examples.append(dspy_example)
print("\n2. Optimizing agent with DSPy:")
# Configure optimizer
optimizer = BootstrapFewShot(
metric=agent_metric,
max_bootstrapped_demos=2,
max_labeled_demos=2
)
# Note: In a real scenario, this would optimize the prompts
# For demo purposes, we'll simulate the optimization
print("Running DSPy optimization... (simulated)")
# In real DSPy usage:
# optimized_agent = optimizer.compile(agent, trainset=dspy_examples)
optimized_agent = agent # Mock optimization for demo
print("\n3. Testing optimized agent:")
optimized_metrics = evaluate_agent(optimized_agent, test_data)
print(f"Optimized performance: {optimized_metrics}")
# Show improvement
improvement = optimized_metrics["overall_score"] - baseline_metrics["overall_score"]
print(f"\nImprovement: {improvement:.2%}")
print("\n4. Example agent interactions:")
test_queries = [
"I can't wait for my trip to Paris!",
"The rain is ruining my day.",
"What time is it?"
]
for query in test_queries:
result = optimized_agent(query)
print(f"\nQuery: {query}")
print(f"Sentiment: {result['sentiment']}")
print(f"Category: {result['category']}")
print(f"Response: {result['response']}")
if __name__ == "__main__":
main()
How to Run¶
- Save as
10_dspy_optimization.py
- Install:
pip install dspy-ai
- Run:
python 10_dspy_optimization.py
Key Features¶
- Automatic Optimization: DSPy automatically improves prompts
- Modular Design: Reusable components with clear interfaces
- Performance Metrics: Systematic evaluation of improvements
- Few-shot Learning: Automatic example selection and optimization