{
  "@context": "https://schema.org",
  "@type": "Dataset",
  "@id": "https://requesty.ai/data/coding-agent-latency-by-provider-apr-2026",
  "id": "coding-agent-latency-provider-apr26",
  "slug": "coding-agent-latency-by-provider-apr-2026",
  "title": "Claude Code median latency by provider and model, April 2026",
  "shortTitle": "Claude Code latency by provider",
  "topic": "latency",
  "abstract": "How does Claude Code latency vary by cloud provider? In April 2026, Anthropic Haiku is the fastest at 1.8s median provider latency. Opus latency is remarkably consistent across providers (4.5-4.9s). Vertex Sonnet is the slowest at 6.2s, roughly 40% slower than the same model on Anthropic direct.",
  "whyItMatters": "Provider choice affects both latency and reliability for the same model. Anthropic direct offers the lowest latency for Haiku and Sonnet, while Bedrock provides higher cache hit rates. Vertex delivers the fastest TTFT for Sonnet but the slowest total completion time. These tradeoffs matter for coding agents that make 50-200 API calls per session.",
  "questions": [
    "Which cloud provider has the lowest latency for Claude Code?",
    "How does Bedrock latency compare to Anthropic direct for Opus?",
    "Does Vertex offer any latency advantage for Claude models?",
    "What is the P95 latency spread across providers?"
  ],
  "period": "Apr 2026",
  "updated": "2026-05-16",
  "license": "CC BY 4.0",
  "licenseUrl": "https://creativecommons.org/licenses/by/4.0/",
  "caveats": [
    "Latency varies by region, time of day, and prompt length. These are global medians.",
    "Bedrock and Vertex have lower traffic volume than Anthropic direct, which may affect percentile stability.",
    "Cache hit rates depend on prompt structure and are not solely a function of the provider."
  ],
  "keyFindings": [
    "Anthropic Haiku: 1.8s median, the fastest Claude Code path. Sub-second TTFT at 0.79s.",
    "Opus latency is nearly identical across Anthropic (4.9s), Bedrock (4.9s), and Vertex (4.5s).",
    "Vertex has the lowest Opus latency (4.5s) and best TTFT for Sonnet (1.4s), but highest Sonnet total latency (6.2s).",
    "Bedrock achieves the highest cache hit rates (94-95%) across all model families.",
    "P95 latency ranges from 8s (Anthropic Haiku) to 32s (Vertex Sonnet). Tail latency varies 4x across providers."
  ],
  "columns": [
    {
      "key": "label",
      "label": "Provider (Model)",
      "unit": "count"
    },
    {
      "key": "medianLatency",
      "label": "Median latency",
      "unit": "count"
    },
    {
      "key": "p95Latency",
      "label": "P95 latency",
      "unit": "count"
    },
    {
      "key": "medianTtft",
      "label": "Median TTFT",
      "unit": "count"
    },
    {
      "key": "successRate",
      "label": "Success rate",
      "unit": "percent"
    },
    {
      "key": "cacheHitRate",
      "label": "Cache hit rate",
      "unit": "percent"
    }
  ],
  "rows": [
    {
      "label": "Anthropic (Haiku)",
      "medianLatency": "1.8s",
      "p95Latency": "8.1s",
      "medianTtft": "0.8s",
      "successRate": 0.9596,
      "cacheHitRate": 0.9033
    },
    {
      "label": "Vertex (Haiku)",
      "medianLatency": "2.1s",
      "p95Latency": "9.3s",
      "medianTtft": "0.9s",
      "successRate": 0.9282,
      "cacheHitRate": 0.9475
    },
    {
      "label": "Bedrock (Haiku)",
      "medianLatency": "2.6s",
      "p95Latency": "17.1s",
      "medianTtft": "1.4s",
      "successRate": 0.8343,
      "cacheHitRate": 0.8438
    },
    {
      "label": "Anthropic (Sonnet)",
      "medianLatency": "4.4s",
      "p95Latency": "24.3s",
      "medianTtft": "1.9s",
      "successRate": 0.9701,
      "cacheHitRate": 0.9171
    },
    {
      "label": "Vertex (Opus)",
      "medianLatency": "4.5s",
      "p95Latency": "15.8s",
      "medianTtft": "1.9s",
      "successRate": 0.9646,
      "cacheHitRate": 0.9559
    },
    {
      "label": "Bedrock (Sonnet)",
      "medianLatency": "4.8s",
      "p95Latency": "24.7s",
      "medianTtft": "2.1s",
      "successRate": 0.9761,
      "cacheHitRate": 0.9414
    },
    {
      "label": "Bedrock (Opus)",
      "medianLatency": "4.9s",
      "p95Latency": "27.4s",
      "medianTtft": "2.3s",
      "successRate": 0.9599,
      "cacheHitRate": 0.9464
    },
    {
      "label": "Anthropic (Opus)",
      "medianLatency": "4.9s",
      "p95Latency": "27.1s",
      "medianTtft": "2.5s",
      "successRate": 0.9873,
      "cacheHitRate": 0.9248
    },
    {
      "label": "Vertex (Sonnet)",
      "medianLatency": "6.2s",
      "p95Latency": "32.1s",
      "medianTtft": "1.4s",
      "successRate": 0.9708,
      "cacheHitRate": 0.8593
    }
  ],
  "rowKey": "label",
  "citation": {
    "apa": "Requesty (2026). Claude Code median latency by provider and model, April 2026. Requesty Data. https://requesty.ai/data/coding-agent-latency-by-provider-apr-2026",
    "bibtex": "@misc{requesty_coding_agent_latency_by_provider_apr_2026,\n  author       = {{Requesty}},\n  title        = {Claude Code median latency by provider and model, April 2026},\n  year         = {2026},\n  howpublished = {\\url{https://requesty.ai/data/coding-agent-latency-by-provider-apr-2026}},\n  note         = {Requesty Data}\n}"
  },
  "permalink": "https://requesty.ai/data/coding-agent-latency-by-provider-apr-2026",
  "downloads": {
    "json": "https://requesty.ai/data/coding-agent-latency-by-provider-apr-2026/data.json",
    "csv": "https://requesty.ai/data/coding-agent-latency-by-provider-apr-2026/data.csv",
    "markdown": "https://requesty.ai/data/coding-agent-latency-by-provider-apr-2026.md"
  },
  "citedIn": [],
  "image": "https://requesty.ai/data/coding-agent-latency-by-provider-apr-2026/opengraph-image",
  "source": {
    "organization": "Requesty",
    "url": "https://requesty.ai"
  }
}