Python DSPy Agent框架：2026年LLM程式設計的5個致命坑及自動優化實戰

手寫Prompt的時代該結束了

你花3天調出一個完美的Prompt，換了個模型就全廢了；你精心設計的few-shot範例，在新版本模型上效果反而變差；你的Agent鏈路越來越長，每一步的Prompt都成了維護噩夢。2026年，DSPy（Declarative Self-improving Python） 讓LLM程式設計從「手寫Prompt」進化為「宣告式程式設計+自動優化」——你只需定義輸入輸出簽名，框架自動搜尋最優Prompt和微調策略。

本文將帶你從零構建一個基於DSPy的AI Agent，並解決生產環境中最常遇到的5個致命坑。

DSPy核心概念

概念	說明
Signature（簽名）	宣告式定義模組的輸入輸出，如`"question -> answer"`
Module（模組）	可組合的LLM呼叫單元，類似PyTorch的nn.Module
Teleprompter（優化器）	自動搜尋最優Prompt/範例的優化器
Example（範例）	標準化的輸入輸出資料樣本
Metric（度量）	評估模組輸出品質的打分函式
Adapter（適配器）	將簽名轉換為具體LLM API呼叫的適配層

DSPy與傳統Prompt工程對比

對比維度	手寫Prompt	DSPy宣告式
開發方式	手動編寫、反覆試錯	宣告簽名、自動優化
模型遷移	需要重寫所有Prompt	只需更換Adapter
可維護性	低，Prompt散落各處	高，簽名即文件
優化效率	依賴人工經驗	自動搜尋最優解
多步推理	手動串聯，容易出錯	模組化組合，型別安全

問題分析：DSPy開發的5大挑戰

簽名設計不當：輸入輸出欄位命名模糊，導致LLM理解偏差
優化器選擇困難：BootstrapFewShot、MIPROv2等優化器適用場景不同
多步推理鏈斷裂：模組間資料傳遞型別不匹配，鏈路中途崩潰
度量函式不準：評估標準與業務目標不一致，優化方向跑偏
非同步並發陷阱：大批量優化時未控制並發，觸發API限流

分步實操：完整DSPy Agent實現

Step 1：環境搭建

pip install dspy-ai==2.6.0
pip install openai==1.35.0
pip install datasets==2.19.0

import dspy

lm = dspy.LM(
    model="openai/gpt-4o-mini",
    api_key="your-api-key",
    temperature=0.7,
    max_tokens=2048,
)
dspy.configure(lm=lm)

Step 2：定義簽名與模組

class QuestionAnswer(dspy.Signature):
    """根據給定的上下文資訊回答問題，如果上下文中沒有答案則回答'無法回答'。"""

    context: str = dspy.InputField(desc="包含答案的上下文文字")
    question: str = dspy.InputField(desc="需要回答的問題")
    answer: str = dspy.OutputField(desc="基於上下文的簡短答案")


class RAGModule(dspy.Module):
    def __init__(self, num_passages: int = 3):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(QuestionAnswer)

    def forward(self, question: str) -> dspy.Prediction:
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

Step 3：構建多步推理Agent

class DecomposeQuestion(dspy.Signature):
    """將複雜問題分解為多個簡單的子問題。"""

    question: str = dspy.InputField(desc="需要分解的複雜問題")
    sub_questions: list[str] = dspy.OutputField(desc="分解後的子問題列表")


class SynthesizeAnswer(dspy.Signature):
    """根據多個子問題的答案綜合出最終答案。"""

    original_question: str = dspy.InputField(desc="原始複雜問題")
    sub_answers: list[str] = dspy.InputField(desc="各子問題的答案")
    final_answer: str = dspy.OutputField(desc="綜合後的最終答案")


class MultiStepAgent(dspy.Module):
    def __init__(self, num_passages: int = 3):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=num_passages)
        self.decompose = dspy.ChainOfThought(DecomposeQuestion)
        self.sub_answer = dspy.ChainOfThought(QuestionAnswer)
        self.synthesize = dspy.ChainOfThought(SynthesizeAnswer)

    def forward(self, question: str) -> dspy.Prediction:
        decomposed = self.decompose(question=question)
        sub_answers = []
        for sub_q in decomposed.sub_questions:
            context = self.retrieve(sub_q).passages
            sub_pred = self.sub_answer(context="\n".join(context), question=sub_q)
            sub_answers.append(sub_pred.answer)
        final = self.synthesize(
            original_question=question,
            sub_answers=sub_answers,
        )
        return dspy.Prediction(
            sub_questions=decomposed.sub_questions,
            sub_answers=sub_answers,
            answer=final.final_answer,
        )

Step 4：定義度量函式

def answer_exact_match(example: dspy.Example, prediction: dspy.Prediction, trace=None) -> float:
    """精確匹配度量"""
    return float(
        example.answer.strip().lower() == prediction.answer.strip().lower()
    )

def answer_f1_score(example: dspy.Example, prediction: dspy.Prediction, trace=None) -> float:
    """F1分數度量"""
    pred_tokens = set(prediction.answer.strip().lower().split())
    gold_tokens = set(example.answer.strip().lower().split())
    if not pred_tokens or not gold_tokens:
        return float(pred_tokens == gold_tokens)
    common = pred_tokens & gold_tokens
    if not common:
        return 0.0
    precision = len(common) / len(pred_tokens)
    recall = len(common) / len(gold_tokens)
    return 2 * precision * recall / (precision + recall)

Step 5：自動優化

from dspy.teleprompt import BootstrapFewShot, MIPROv2

trainset = [
    dspy.Example(question="DSPy是什麼?", answer="一個宣告式LLM程式設計框架").with_inputs("question"),
    dspy.Example(question="LoRA的作用?", answer="降低大模型微調顯存需求").with_inputs("question"),
    dspy.Example(question="RAG的全稱?", answer="Retrieval-Augmented Generation").with_inputs("question"),
]

optimizer_fewshot = BootstrapFewShot(
    metric=answer_exact_match,
    max_bootstrapped_demos=4,
    max_labeled_demos=4,
    max_rounds=3,
)

optimized_module = optimizer_fewshot.compile(
    RAGModule(),
    trainset=trainset,
)

optimizer_mipro = MIPROv2(
    metric=answer_f1_score,
    num_threads=4,
    max_bootstrapped_demos=4,
    max_labeled_demos=4,
    num_candidates=10,
    num_trials=20,
)

fully_optimized = optimizer_mipro.compile(
    RAGModule(),
    trainset=trainset,
)

Step 6：評估與部署

from dspy.evaluate import Evaluate

evaluator = Evaluate(
    devset=trainset,
    metric=answer_f1_score,
    num_threads=4,
    display_progress=True,
    display_table=5,
)

score = evaluator(fully_optimized)
print(f"優化後F1分數: {score:.2f}")

result = fully_optimized(question="DSPy框架的核心優勢是什麼?")
print(f"答案: {result.answer}")

避坑指南

坑1：簽名欄位描述缺失

# ❌ 錯誤：沒有描述，LLM不知道輸出格式
class BadSig(dspy.Signature):
    question: str = dspy.InputField()
    answer: str = dspy.OutputField()

# ✅ 正確：新增詳細描述，引導LLM輸出
class GoodSig(dspy.Signature):
    """根據上下文回答問題，答案不超過50字。"""
    question: str = dspy.InputField(desc="使用者提出的問題")
    answer: str = dspy.OutputField(desc="簡潔準確的答案，不超過50字")

坑2：優化器訓練集過少

# ❌ 錯誤：訓練集不足，優化器無法學到有效模式
trainset = [dspy.Example(question="1+1=?", answer="2").with_inputs("question")]

# ✅ 正確：至少50-200條高品質訓練資料
trainset = load_training_data(min_size=50)

坑3：度量函式過於寬鬆

# ❌ 錯誤：永遠回傳1.0，優化器無法區分好壞
def bad_metric(example, prediction, trace=None):
    return 1.0

# ✅ 正確：使用有區分度的度量
def good_metric(example, prediction, trace=None):
    return answer_f1_score(example, prediction, trace)

坑4：模組間型別不匹配

# ❌ 錯誤：子模組回傳list，下游期望str
class StepA(dspy.Signature):
    items: list[str] = dspy.OutputField()

class StepB(dspy.Signature):
    text: str = dspy.InputField()

# ✅ 正確：在forward中做型別轉換
def forward(self, question):
    result_a = self.step_a(question=question)
    joined = "\n".join(result_a.items)
    result_b = self.step_b(text=joined)
    return result_b

坑5：未處理LLM輸出解析失敗

# ❌ 錯誤：直接存取輸出欄位，可能拋出異常
prediction = self.module(question=q)
answer = prediction.answer

# ✅ 正確：新增異常處理和預設值
try:
    prediction = self.module(question=q)
    answer = prediction.answer if prediction.answer else "無法回答"
except Exception as e:
    answer = f"處理失敗: {str(e)}"

報錯排查

序號	報錯訊息	原因	解決方法
1	`AssertionError: Signature must have at least one output field`	簽名缺少輸出欄位	確保Signature至少有一個OutputField
2	`TypeError: Expected str, got list`	模組間型別不匹配	在forward中做型別轉換
3	`dspy.primitives.assertions.AssertionError`	斷言條件不滿足	檢查dspy.Assert的條件邏輯
4	`openai.RateLimitError`	API呼叫頻率超限	減小num_threads或新增重試邏輯
5	`KeyError: 'answer'`	LLM輸出未包含預期欄位	檢查簽名定義，新增欄位描述
6	`ValueError: No demos were bootstrapped`	訓練集品質不足	增加訓練資料，檢查度量函式
7	`JSONDecodeError`	LLM輸出非JSON格式	使用dspy.ChainOfThought替代dspy.Predict
8	`AttributeError: module has no attribute 'retrieve'`	模組未初始化檢索器	確保在__init__中初始化所有子模組
9	`TimeoutError: LLM call timed out`	LLM回應逾時	增大max_tokens或設定timeout引數
10	`ImportError: cannot import name 'MIPROv2'`	DSPy版本過低	升級到dspy-ai>=2.5.0

進階最佳化

1. 自訂Adapter支援本地模型

class LocalModelAdapter(dspy.Adapter):
    def format(self, signature, demos, inputs):
        prompt = f"任務: {signature.__doc__}\n\n"
        for demo in demos:
            for key, val in demo.items():
                prompt += f"{key}: {val}\n"
            prompt += "\n"
        for key, val in inputs.items():
            prompt += f"{key}: {val}\n"
        prompt += "\n請輸出:\n"
        for field_name, field_info in signature.output_fields.items():
            prompt += f"{field_name}: "
        return prompt

    def parse(self, signature, completion):
        outputs = {}
        for line in completion.strip().split("\n"):
            if ":" in line:
                key, val = line.split(":", 1)
                outputs[key.strip()] = val.strip()
        return outputs

2. 斷言驅動的輸出約束

class ConstrainedQA(dspy.Module):
    def __init__(self):
        super().__init__()
        self.generate = dspy.ChainOfThought(QuestionAnswer)

    def forward(self, question: str, context: str) -> dspy.Prediction:
        result = self.generate(question=question, context=context)
        dspy.Assert(
            len(result.answer) > 0,
            "答案不能為空",
        )
        dspy.Assert(
            len(result.answer) <= 200,
            "答案不能超過200字",
        )
        return result

3. 快取最佳化減少API呼叫

import hashlib
import json

class CachedModule(dspy.Module):
    def __init__(self, module: dspy.Module, cache_dir: str = ".dspy_cache"):
        super().__init__()
        self.module = module
        self.cache_dir = cache_dir
        self.cache = {}

    def _cache_key(self, **kwargs):
        content = json.dumps(kwargs, sort_keys=True)
        return hashlib.md5(content.encode()).hexdigest()

    def forward(self, **kwargs):
        key = self._cache_key(**kwargs)
        if key in self.cache:
            return self.cache[key]
        result = self.module(**kwargs)
        self.cache[key] = result
        return result

對比分析

維度	DSPy	LangChain	LlamaIndex	原生Prompt
程式設計範式	宣告式	命令式鏈式	命令式索引	手寫Prompt
自動優化	✅內建優化器	❌需手動	❌需手動	❌純手動
可重現性	✅簽名固定	⚠️依賴模板	⚠️依賴模板	❌難以重現
模型遷移	✅換Adapter	⚠️需改模板	⚠️需改模板	❌全部重寫
學習曲線	中等	低	低	低
生產就緒	✅型別安全	⚠️靈活但脆弱	✅RAG場景強	❌維護成本高
社群生態	快速成長	成熟	成熟	N/A

總結：DSPy不是「又一個LLM框架」，而是LLM程式設計範式的根本轉變——從「手寫Prompt」到「宣告式程式設計+自動優化」。它的核心價值在於：1）簽名即文件，消除Prompt維護噩夢；2）優化器自動搜尋最優Prompt，不再依賴人工經驗；3）模組化組合保證型別安全，多步推理鏈不再斷裂。2026年的DSPy實踐路徑：先用ChainOfThought+簽名快速驗證→再用BootstrapFewShot優化範例→最後用MIPROv2全量優化。關鍵是要有高品質的度量函式，它決定了優化的方向是否正確。

線上工具推薦

JSON格式化：/zh-TW/json/format
Base64編解碼：/zh-TW/encode/base64
Hash計算：/zh-TW/encode/hash
JWT解碼：/zh-TW/encode/jwt-decode