Structured output & JSON schemas

Defining a schema in the system prompt

system = """You are a data extraction assistant.
Always respond with valid JSON matching exactly this schema:

{
  "invoice_number": "string — the invoice ID exactly as printed",
  "vendor_name": "string — full legal company name",
  "invoice_date": "string — ISO 8601 format: YYYY-MM-DD",
  "due_date": "string — ISO 8601 format: YYYY-MM-DD",
  "subtotal": "number — in the invoice's currency, no currency symbol",
  "tax_amount": "number — total tax, 0 if none",
  "total_amount": "number — final total including tax",
  "currency_code": "string — ISO 4217: USD, EUR, GBP, JPY, etc.",
  "line_items": [
    {
      "description": "string",
      "quantity": "number",
      "unit_price": "number",
      "line_total": "number"
    }
  ]
}

Rules:
- Respond ONLY with the JSON object
- No preamble, no explanation, no markdown code fences
- If a field cannot be determined, use null (never omit the field)
- Dates must always be ISO 8601 (YYYY-MM-DD), never "March 15" or "15/03/2026"
"""

strict: true for guaranteed schema compliance

When using tool_use for extraction, strict: true enforces schema compliance at the API level:

extraction_tool = {
    "name": "extract_invoice",
    "description": "Extract structured data from an invoice",
    "input_schema": {
        "type": "object",
        "properties": {
            "invoice_number": {"type": "string"},
            "vendor_name": {"type": "string"},
            "invoice_date": {"type": "string", "pattern": "^\\d{4}-\\d{2}-\\d{2}$"},
            "total_amount": {"type": "number"},
            "currency_code": {
                "type": "string",
                "enum": ["USD", "EUR", "GBP", "JPY", "CAD", "AUD"]
            }
        },
        "required": ["invoice_number", "vendor_name", "invoice_date", "total_amount", "currency_code"]
    },
    "strict": True  # API guarantees schema compliance
}

Validation retry loop: full implementation

import json
import jsonschema
from anthropic import Anthropic

client = Anthropic()

def extract_with_validation(document: str, schema: dict, max_retries: int = 3) -> dict:
    messages = [{"role": "user", "content": document}]
    system = build_extraction_system_prompt(schema)

    for attempt in range(max_retries + 1):
        response = client.messages.create(
            model="claude-sonnet-4-6",
            max_tokens=2048,
            system=system,
            messages=messages
        )
        raw_text = response.content[0].text

        # Parse JSON
        try:
            result = json.loads(raw_text)
        except json.JSONDecodeError as e:
            if attempt == max_retries:
                return {"status": "failed", "requires_human_review": True,
                        "error": f"Invalid JSON: {e}"}
            messages.append({"role": "assistant", "content": raw_text})
            messages.append({"role": "user", "content":
                f"Your response was not valid JSON. Parse error: {e}\n"
                f"Please respond with only a valid JSON object."})
            continue

        # Validate schema
        try:
            jsonschema.validate(result, schema)
            return {"status": "success", "data": result}  # ✅

        except jsonschema.ValidationError as e:
            if attempt == max_retries:
                return {"status": "failed", "requires_human_review": True,
                        "data": result, "error": str(e)}

            # ✅ Specific, actionable error feedback
            field_path = " → ".join(str(p) for p in e.absolute_path) or "root"
            error_feedback = (
                f"Schema validation failed:\n"
                f"Field: {field_path}\n"
                f"Problem: {e.message}\n"
                f"You provided: {json.dumps(e.instance)}\n"
            )
            if "enum" in str(e.schema):
                valid_values = e.schema.get("enum", [])
                error_feedback += f"Valid values are: {valid_values}\n"

            messages.append({"role": "assistant", "content": raw_text})
            messages.append({"role": "user", "content":
                f"Your response failed validation. Please correct and regenerate.\n\n{error_feedback}"})

Graceful degradation for persistent failures

def process_batch(documents: list) -> dict:
    results = {"processed": [], "failed": [], "needs_review": []}

    for doc in documents:
        result = extract_with_validation(doc["content"], SCHEMA)

        if result["status"] == "success":
            results["processed"].append({**doc, "data": result["data"]})
        else:
            # Don't drop failed items — route to human review
            results["needs_review"].append({
                "doc_id": doc["id"],
                "error": result["error"],
                "partial_data": result.get("data"),
                "requires_human_review": True
            })

    return results

Defining a schema in the system prompt​

strict: true for guaranteed schema compliance​

Validation retry loop: full implementation​

Graceful degradation for persistent failures​

Official documentation​

Defining a schema in the system prompt

strict: true for guaranteed schema compliance

Validation retry loop: full implementation

Graceful degradation for persistent failures

Official documentation