{
  "@context": "https://schema.org",
  "@type": "Dataset",
  "@id": "https://requesty.ai/data/cache-hit-rate-by-provider-april-2026",
  "id": "cache-hit-april-2026",
  "slug": "cache-hit-rate-by-provider-april-2026",
  "title": "Prompt-cache hit rate per provider, April 2026",
  "shortTitle": "Cache hit rate",
  "topic": "latency",
  "abstract": "Which AI providers have the highest prompt-cache hit rate? In April 2026 Anthropic-direct led the Requesty gateway at 77% (cached_tokens / input_tokens), Bedrock Claude was healthy at 57%, and Vertex (Claude) trailed at 24%. Same Claude model family, 3× lower hit rate. Vertex (Gemini) sat at 10% and Mistral at 4%, the floor among major routes.",
  "whyItMatters": "Prompt caching directly cuts the per-request cost of long, repeated context. The difference between a 77% hit rate and a 24% hit rate on the same model family is roughly a 3× reduction in input tokens billed at full price. The Vertex-Claude gap looks like a configuration issue rather than a platform limitation, which means Claude users on Vertex are leaving substantial savings on the table without a code change.",
  "questions": [
    "Which AI providers have the best prompt caching hit rate?",
    "Why is prompt caching so much worse on Vertex Claude than on Anthropic direct?",
    "How much does prompt caching reduce LLM inference cost in production?",
    "Which providers should I avoid if I rely on prompt caching?"
  ],
  "period": "Apr 2026",
  "updated": "2026-05-09",
  "license": "CC BY 4.0",
  "licenseUrl": "https://creativecommons.org/licenses/by/4.0/",
  "caveats": [
    "Moonshot 88% cache-hit reading is a measurement artefact at 6% success rate. Excluded from the leader panel.",
    "cached_tokens semantics differ slightly by provider (which tokens count as \"cached\"). The ratio is meaningful but not strictly apples-to-apples across providers."
  ],
  "keyFindings": [
    "Anthropic-direct: 77% cache hit, the leader by a wide margin.",
    "Bedrock Claude: 57%. OpenAI: 36%. DeepSeek: 48%. Healthy.",
    "Vertex (Claude): 24%. Same model as Anthropic-direct (77%) and Bedrock (57%), 3× lower hit rate. Configuration gap.",
    "Vertex (Gemini): 10%. The floor among major routes.",
    "Mistral: 4%. Roughly the floor; prompt caching is not a meaningful lever on that route today.",
    "Moonshot reports 88% but it is a measurement artefact at 6% success rate; do not quote it."
  ],
  "columns": [
    {
      "key": "provider",
      "label": "Provider",
      "unit": "count"
    },
    {
      "key": "cache_hit_rate",
      "label": "Cache hit rate",
      "unit": "percent"
    }
  ],
  "rows": [
    {
      "provider": "Anthropic",
      "cache_hit_rate": 0.775
    },
    {
      "provider": "Bedrock",
      "cache_hit_rate": 0.569
    },
    {
      "provider": "DeepSeek",
      "cache_hit_rate": 0.483
    },
    {
      "provider": "Azure",
      "cache_hit_rate": 0.41
    },
    {
      "provider": "OpenAI",
      "cache_hit_rate": 0.364
    },
    {
      "provider": "xAI",
      "cache_hit_rate": 0.357
    },
    {
      "provider": "Novita",
      "cache_hit_rate": 0.319
    },
    {
      "provider": "Vertex (Claude)",
      "cache_hit_rate": 0.235
    },
    {
      "provider": "Vertex (Gemini)",
      "cache_hit_rate": 0.096
    },
    {
      "provider": "Mistral",
      "cache_hit_rate": 0.041
    }
  ],
  "rowKey": "provider",
  "citation": {
    "apa": "Requesty (2026). Prompt-cache hit rate per provider, April 2026. Requesty Data. https://requesty.ai/data/cache-hit-rate-by-provider-april-2026",
    "bibtex": "@misc{requesty_cache_hit_rate_by_provider_april_2026,\n  author       = {{Requesty}},\n  title        = {Prompt-cache hit rate per provider, April 2026},\n  year         = {2026},\n  howpublished = {\\url{https://requesty.ai/data/cache-hit-rate-by-provider-april-2026}},\n  note         = {Requesty Data}\n}"
  },
  "permalink": "https://requesty.ai/data/cache-hit-rate-by-provider-april-2026",
  "downloads": {
    "json": "https://requesty.ai/data/cache-hit-rate-by-provider-april-2026/data.json",
    "csv": "https://requesty.ai/data/cache-hit-rate-by-provider-april-2026/data.csv",
    "markdown": "https://requesty.ai/data/cache-hit-rate-by-provider-april-2026.md"
  },
  "citedIn": [
    {
      "title": "What the gateway saw in April 2026",
      "url": "https://requesty.ai/blog/provider-trends-april-2026-agentic-share-latency"
    }
  ],
  "image": "https://requesty.ai/data/cache-hit-rate-by-provider-april-2026/opengraph-image",
  "source": {
    "organization": "Requesty",
    "url": "https://requesty.ai"
  }
}