Python DSPy Agentフレームワーク：2026年LLMプログラミングの5つの致命的落とし穴と自動最適化実践

手書きプロンプトの時代は終わった

3日かけて完璧なプロンプトを作っても、モデルを変えると全く使えなくなる。慎重に設計したfew-shot例が、新バージョンモデルで逆に悪化する。Agentチェーンが長くなるほど、各ステップのプロンプトがメンテナンスの悪夢になる。2026年、DSPy（Declarative Self-improving Python） がLLMプログラミングを「手書きプロンプト」から「宣言型プログラミング+自動最適化」へ進化させる——入出力シグネチャを定義するだけで、フレームワークが最適なプロンプトとファインチューニング戦略を自動検索。

本記事では、DSPyベースのAI Agentをゼロから構築し、本番環境で最もよく遭遇する5つの致命的落とし穴を解決します。

DSPyコア概念

概念	説明
Signature（シグネチャ）	モジュールの入出力を宣言的に定義、例：`"question -> answer"`
Module（モジュール）	合成可能なLLM呼び出しユニット、PyTorchのnn.Moduleに類似
Teleprompter（オプティマイザ）	最適なプロンプト/例を自動検索するオプティマイザ
Example（例）	標準化された入出力データサンプル
Metric（メトリック）	モジュール出力品質を評価するスコアリング関数
Adapter（アダプタ）	シグネチャを具体的なLLM API呼び出しに変換する適応層

DSPyと従来のプロンプトエンジニアリングの比較

比較次元	手書きプロンプト	DSPy宣言型
開発方式	手動記述、試行錯誤	シグネチャ宣言、自動最適化
モデル移行	全プロンプトの書き直し	Adapterの交換のみ
保守性	低、プロンプトが散在	高、シグネチャがドキュメント
最適化効率	人的経験に依存	最適解を自動検索
マルチステップ推論	手動チェーン、エラー発生しやすい	モジュール合成、型安全

問題分析：DSPy開発の5つの課題

シグネチャ設計の不備：入出力フィールド名が曖昧で、LLMの理解にズレが生じる
オプティマイザ選択の困難：BootstrapFewShot、MIPROv2など適用シナリオが異なる
マルチステップ推論チェーンの断絶：モジュール間のデータ渡しで型不一致、チェーン途中でクラッシュ
メトリック関数の不正確さ：評価基準とビジネス目標が不一致、最適化の方向が逸脱
非同期並行の落とし穴：大規模最適化時に並行制御なし、APIレート制限をトリガー

ステップバイステップ：完全DSPy Agent実装

Step 1：環境構築

pip install dspy-ai==2.6.0
pip install openai==1.35.0
pip install datasets==2.19.0

import dspy

lm = dspy.LM(
    model="openai/gpt-4o-mini",
    api_key="your-api-key",
    temperature=0.7,
    max_tokens=2048,
)
dspy.configure(lm=lm)

Step 2：シグネチャとモジュールの定義

class QuestionAnswer(dspy.Signature):
    """与えられたコンテキスト情報に基づいて質問に回答する。コンテキストに答えがない場合は'回答不可'と答える。"""

    context: str = dspy.InputField(desc="答えを含むコンテキストテキスト")
    question: str = dspy.InputField(desc="回答すべき質問")
    answer: str = dspy.OutputField(desc="コンテキストに基づく簡潔な回答")


class RAGModule(dspy.Module):
    def __init__(self, num_passages: int = 3):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(QuestionAnswer)

    def forward(self, question: str) -> dspy.Prediction:
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

Step 3：マルチステップ推論Agentの構築

class DecomposeQuestion(dspy.Signature):
    """複雑な質問を複数の単純なサブ質問に分解する。"""

    question: str = dspy.InputField(desc="分解すべき複雑な質問")
    sub_questions: list[str] = dspy.OutputField(desc="分解されたサブ質問のリスト")


class SynthesizeAnswer(dspy.Signature):
    """複数のサブ質問の回答から最終回答を総合する。"""

    original_question: str = dspy.InputField(desc="元の複雑な質問")
    sub_answers: list[str] = dspy.InputField(desc="各サブ質問の回答")
    final_answer: str = dspy.OutputField(desc="総合された最終回答")


class MultiStepAgent(dspy.Module):
    def __init__(self, num_passages: int = 3):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=num_passages)
        self.decompose = dspy.ChainOfThought(DecomposeQuestion)
        self.sub_answer = dspy.ChainOfThought(QuestionAnswer)
        self.synthesize = dspy.ChainOfThought(SynthesizeAnswer)

    def forward(self, question: str) -> dspy.Prediction:
        decomposed = self.decompose(question=question)
        sub_answers = []
        for sub_q in decomposed.sub_questions:
            context = self.retrieve(sub_q).passages
            sub_pred = self.sub_answer(context="\n".join(context), question=sub_q)
            sub_answers.append(sub_pred.answer)
        final = self.synthesize(
            original_question=question,
            sub_answers=sub_answers,
        )
        return dspy.Prediction(
            sub_questions=decomposed.sub_questions,
            sub_answers=sub_answers,
            answer=final.final_answer,
        )

Step 4：メトリック関数の定義

def answer_exact_match(example: dspy.Example, prediction: dspy.Prediction, trace=None) -> float:
    """完全一致メトリック"""
    return float(
        example.answer.strip().lower() == prediction.answer.strip().lower()
    )

def answer_f1_score(example: dspy.Example, prediction: dspy.Prediction, trace=None) -> float:
    """F1スコアメトリック"""
    pred_tokens = set(prediction.answer.strip().lower().split())
    gold_tokens = set(example.answer.strip().lower().split())
    if not pred_tokens or not gold_tokens:
        return float(pred_tokens == gold_tokens)
    common = pred_tokens & gold_tokens
    if not common:
        return 0.0
    precision = len(common) / len(pred_tokens)
    recall = len(common) / len(gold_tokens)
    return 2 * precision * recall / (precision + recall)

Step 5：自動最適化

from dspy.teleprompt import BootstrapFewShot, MIPROv2

trainset = [
    dspy.Example(question="DSPyとは?", answer="宣言型LLMプログラミングフレームワーク").with_inputs("question"),
    dspy.Example(question="LoRAの役割は?", answer="大モデルファインチューニングのメモリ要件を削減").with_inputs("question"),
    dspy.Example(question="RAGの略称は?", answer="Retrieval-Augmented Generation").with_inputs("question"),
]

optimizer_fewshot = BootstrapFewShot(
    metric=answer_exact_match,
    max_bootstrapped_demos=4,
    max_labeled_demos=4,
    max_rounds=3,
)

optimized_module = optimizer_fewshot.compile(
    RAGModule(),
    trainset=trainset,
)

optimizer_mipro = MIPROv2(
    metric=answer_f1_score,
    num_threads=4,
    max_bootstrapped_demos=4,
    max_labeled_demos=4,
    num_candidates=10,
    num_trials=20,
)

fully_optimized = optimizer_mipro.compile(
    RAGModule(),
    trainset=trainset,
)

Step 6：評価とデプロイ

from dspy.evaluate import Evaluate

evaluator = Evaluate(
    devset=trainset,
    metric=answer_f1_score,
    num_threads=4,
    display_progress=True,
    display_table=5,
)

score = evaluator(fully_optimized)
print(f"最適化後F1スコア: {score:.2f}")

result = fully_optimized(question="DSPyフレームワークのコアアドバンテージは?")
print(f"回答: {result.answer}")

落とし穴ガイド

落とし穴1：シグネチャフィールドの説明不足

# ❌ 誤り：説明なし、LLMが出力形式を理解できない
class BadSig(dspy.Signature):
    question: str = dspy.InputField()
    answer: str = dspy.OutputField()

# ✅ 正しい：詳細な説明を追加し、LLM出力をガイド
class GoodSig(dspy.Signature):
    """コンテキストに基づいて質問に回答、50文字以内。"""
    question: str = dspy.InputField(desc="ユーザーが提起した質問")
    answer: str = dspy.OutputField(desc="簡潔で正確な回答、50文字以内")

落とし穴2：オプティマイザの訓練データ不足

# ❌ 誤り：訓練データ不足、オプティマイザが有効なパターンを学習できない
trainset = [dspy.Example(question="1+1=?", answer="2").with_inputs("question")]

# ✅ 正しい：少なくとも50-200件の高品質訓練データ
trainset = load_training_data(min_size=50)

落とし穴3：メトリック関数が緩すぎる

# ❌ 誤り：常に1.0を返す、オプティマイザが良し悪しを区別できない
def bad_metric(example, prediction, trace=None):
    return 1.0

# ✅ 正しい：識別力のあるメトリックを使用
def good_metric(example, prediction, trace=None):
    return answer_f1_score(example, prediction, trace)

落とし穴4：モジュール間の型不一致

# ❌ 誤り：サブモジュールがlistを返す、下流はstrを期待
class StepA(dspy.Signature):
    items: list[str] = dspy.OutputField()

class StepB(dspy.Signature):
    text: str = dspy.InputField()

# ✅ 正しい：forwardで型変換を行う
def forward(self, question):
    result_a = self.step_a(question=question)
    joined = "\n".join(result_a.items)
    result_b = self.step_b(text=joined)
    return result_b

落とし穴5：LLM出力パース失敗の未処理

# ❌ 誤り：出力フィールドに直接アクセス、例外がスローされる可能性
prediction = self.module(question=q)
answer = prediction.answer

# ✅ 正しい：例外処理とデフォルト値を追加
try:
    prediction = self.module(question=q)
    answer = prediction.answer if prediction.answer else "回答不可"
except Exception as e:
    answer = f"処理失敗: {str(e)}"

エラートラブルシューティング

#	エラーメッセージ	原因	解決方法
1	`AssertionError: Signature must have at least one output field`	シグネチャに出力フィールドなし	Signatureに少なくとも1つのOutputFieldを確保
2	`TypeError: Expected str, got list`	モジュール間の型不一致	forwardで型変換を実施
3	`dspy.primitives.assertions.AssertionError`	アサーション条件不満	dspy.Assertの条件ロジックを確認
4	`openai.RateLimitError`	API呼び出し頻度超過	num_threadsを減らすかリトライロジックを追加
5	`KeyError: 'answer'`	LLM出力に期待フィールドなし	シグネチャ定義を確認、フィールド説明を追加
6	`ValueError: No demos were bootstrapped`	訓練データ品質不足	訓練データを増加、メトリック関数を確認
7	`JSONDecodeError`	LLM出力がJSON形式でない	dspy.Predictの代わりにdspy.ChainOfThoughtを使用
8	`AttributeError: module has no attribute 'retrieve'`	モジュールがリトリーバを初期化していない	__init__で全サブモジュールを初期化
9	`TimeoutError: LLM call timed out`	LLMレスポンスタイムアウト	max_tokensを増やすかtimeoutパラメータを設定
10	`ImportError: cannot import name 'MIPROv2'`	DSPyバージョンが低い	dspy-ai>=2.5.0にアップグレード

高度な最適化

1. カスタムAdapterでローカルモデル対応

class LocalModelAdapter(dspy.Adapter):
    def format(self, signature, demos, inputs):
        prompt = f"タスク: {signature.__doc__}\n\n"
        for demo in demos:
            for key, val in demo.items():
                prompt += f"{key}: {val}\n"
            prompt += "\n"
        for key, val in inputs.items():
            prompt += f"{key}: {val}\n"
        prompt += "\n出力してください:\n"
        for field_name, field_info in signature.output_fields.items():
            prompt += f"{field_name}: "
        return prompt

    def parse(self, signature, completion):
        outputs = {}
        for line in completion.strip().split("\n"):
            if ":" in line:
                key, val = line.split(":", 1)
                outputs[key.strip()] = val.strip()
        return outputs

2. アサーション駆動の出力制約

class ConstrainedQA(dspy.Module):
    def __init__(self):
        super().__init__()
        self.generate = dspy.ChainOfThought(QuestionAnswer)

    def forward(self, question: str, context: str) -> dspy.Prediction:
        result = self.generate(question=question, context=context)
        dspy.Assert(
            len(result.answer) > 0,
            "回答は空にできません",
        )
        dspy.Assert(
            len(result.answer) <= 200,
            "回答は200文字を超えられません",
        )
        return result

3. キャッシュ最適化でAPI呼び出し削減

import hashlib
import json

class CachedModule(dspy.Module):
    def __init__(self, module: dspy.Module, cache_dir: str = ".dspy_cache"):
        super().__init__()
        self.module = module
        self.cache_dir = cache_dir
        self.cache = {}

    def _cache_key(self, **kwargs):
        content = json.dumps(kwargs, sort_keys=True)
        return hashlib.md5(content.encode()).hexdigest()

    def forward(self, **kwargs):
        key = self._cache_key(**kwargs)
        if key in self.cache:
            return self.cache[key]
        result = self.module(**kwargs)
        self.cache[key] = result
        return result

比較分析

次元	DSPy	LangChain	LlamaIndex	生プロンプト
プログラミングパラダイム	宣言型	命令型チェーン	命令型インデックス	手書きプロンプト
自動最適化	✅内蔵オプティマイザ	❌手動	❌手動	❌完全手動
再現性	✅シグネチャ固定	⚠️テンプレート依存	⚠️テンプレート依存	❌再現困難
モデル移行	✅Adapter交換	⚠️テンプレート変更	⚠️テンプレート変更	❌全書き直し
学習曲線	中程度	低い	低い	低い
本番対応	✅型安全	⚠️柔軟だが脆弱	✅RAGシナリオ強	❌保守コスト高
コミュニティ	急成長中	成熟	成熟	N/A

まとめ：DSPyは「また別のLLMフレームワーク」ではなく、LLMプログラミングパラダイムの根本的転換です——「手書きプロンプト」から「宣言型プログラミング+自動最適化」へ。コアバリュー：1）シグネチャがドキュメント、プロンプト保守の悪夢を排除；2）オプティマイザが最適なプロンプトを自動検索、人的経験に依存しない；3）モジュール合成で型安全を保証、マルチステップ推論チェーンが切断されない。2026年のDSPy実践パス：ChainOfThought+シグネチャで迅速検証→BootstrapFewShotで例最適化→MIPROv2で全量最適化。鍵は高品質なメトリック関数——最適化の方向が正しいかを決定します。

オンラインツール推奨

JSONフォーマッター：/ja/json/format
Base64エンコード/デコード：/ja/encode/base64
Hash計算：/ja/encode/hash
JWTデコード：/ja/encode/jwt-decode