{
  "@context": "https://schema.org",
  "@type": "Dataset",
  "@id": "https://requesty.ai/data/streaming-ttft-vs-total-april-2026",
  "id": "streaming-ttft-april-2026",
  "slug": "streaming-ttft-vs-total-april-2026",
  "title": "Streaming TTFT vs total latency, April 2026",
  "shortTitle": "Streaming TTFT",
  "topic": "latency",
  "abstract": "Which AI provider has the fastest time-to-first-token? In April 2026 on streaming-and-successful Requesty requests, Azure led TTFT at 593 ms with a 960 ms p50 total, the streaming-UX winner on both axes. xAI was among the fastest on total latency (5.68 s) but slowest to first token (3.27 s), which suggests buffered upstream behaviour rather than true streaming. Vertex (Gemini) and Vertex (Claude) sit at very different points: Gemini totals 3.05 s, Claude totals 8.03 s on the same Vertex route.",
  "whyItMatters": "Time-to-first-token is what users actually feel as latency in chat UIs. A 600 ms TTFT feels instantaneous; a 3 s TTFT feels broken even if total latency is the same. Buffered streaming masquerading as real streaming is a common antipattern in this dataset, and any latency benchmark that only quotes total p50 will miss it entirely.",
  "questions": [
    "What is the fastest streaming LLM provider?",
    "Which LLM has the lowest time to first token in 2026?",
    "Does xAI actually stream or is it buffered?",
    "How does streaming affect perceived AI latency?"
  ],
  "period": "Apr 2026",
  "updated": "2026-05-09",
  "license": "CC BY 4.0",
  "licenseUrl": "https://creativecommons.org/licenses/by/4.0/",
  "caveats": [
    "TTFT (first_token_latency_ns) was not populated before 2026, so YoY is impossible.",
    "Vertex is split into Vertex (Gemini) and Vertex (Claude) by model_used; direct Google traffic is excluded as long-tail.",
    "A non-streaming response that the gateway reports as is_stream=true (because the SDK was set to stream but the upstream did not) will measure TTFT close to total_latency, biasing the read upward."
  ],
  "keyFindings": [
    "Azure: 593 ms p50 TTFT, 960 ms p50 total. The streaming-UX winner on both axes.",
    "Nebius (659 ms TTFT) and OpenAI Responses (731 ms) are also strong on first-token speed.",
    "Vertex (Gemini) 1.29 s TTFT vs Vertex (Claude) 1.44 s TTFT. Gemini totals 3.05 s, Claude totals 8.03 s. The Claude variant carries the heavy agentic completions on this route.",
    "xAI: 5.68 s p50 total with 3.27 s TTFT. suggests upstream buffers responses before flushing rather than true streaming.",
    "Anthropic: 2.14 s TTFT, 5.87 s total. slowest first byte among the very large providers, but consistent shape."
  ],
  "columns": [
    {
      "key": "provider",
      "label": "Provider",
      "unit": "count"
    },
    {
      "key": "p50_ttft_ms",
      "label": "p50 TTFT",
      "unit": "milliseconds"
    },
    {
      "key": "p50_total_ms",
      "label": "p50 total",
      "unit": "milliseconds"
    },
    {
      "key": "p95_ttft_ms",
      "label": "p95 TTFT",
      "unit": "milliseconds"
    },
    {
      "key": "p95_total_ms",
      "label": "p95 total",
      "unit": "milliseconds"
    }
  ],
  "rows": [
    {
      "provider": "Alibaba",
      "p50_ttft_ms": 235,
      "p50_total_ms": 1031,
      "p95_ttft_ms": 4819,
      "p95_total_ms": 13450
    },
    {
      "provider": "Azure",
      "p50_ttft_ms": 593,
      "p50_total_ms": 960,
      "p95_ttft_ms": 1324,
      "p95_total_ms": 3346
    },
    {
      "provider": "Nebius",
      "p50_ttft_ms": 659,
      "p50_total_ms": 4143,
      "p95_ttft_ms": 4206,
      "p95_total_ms": 41101
    },
    {
      "provider": "OpenAI Responses",
      "p50_ttft_ms": 731,
      "p50_total_ms": 6694,
      "p95_ttft_ms": 2590,
      "p95_total_ms": 41464
    },
    {
      "provider": "DeepInfra",
      "p50_ttft_ms": 769,
      "p50_total_ms": 2189,
      "p95_ttft_ms": 1258,
      "p95_total_ms": 3630
    },
    {
      "provider": "Mistral",
      "p50_ttft_ms": 1012,
      "p50_total_ms": 1251,
      "p95_ttft_ms": 5348,
      "p95_total_ms": 17989
    },
    {
      "provider": "DeepSeek",
      "p50_ttft_ms": 1166,
      "p50_total_ms": 5293,
      "p95_ttft_ms": 3042,
      "p95_total_ms": 31728
    },
    {
      "provider": "Vertex (Gemini)",
      "p50_ttft_ms": 1286,
      "p50_total_ms": 3046,
      "p95_ttft_ms": 19551,
      "p95_total_ms": 29035
    },
    {
      "provider": "Vertex (Claude)",
      "p50_ttft_ms": 1439,
      "p50_total_ms": 8030,
      "p95_ttft_ms": 4892,
      "p95_total_ms": 100268
    },
    {
      "provider": "Bedrock",
      "p50_ttft_ms": 1851,
      "p50_total_ms": 5859,
      "p95_ttft_ms": 7722,
      "p95_total_ms": 38398
    },
    {
      "provider": "OpenAI",
      "p50_ttft_ms": 1996,
      "p50_total_ms": 6359,
      "p95_ttft_ms": 15152,
      "p95_total_ms": 25967
    },
    {
      "provider": "Anthropic",
      "p50_ttft_ms": 2141,
      "p50_total_ms": 5868,
      "p95_ttft_ms": 4459,
      "p95_total_ms": 31851
    },
    {
      "provider": "Moonshot",
      "p50_ttft_ms": 2620,
      "p50_total_ms": 7487,
      "p95_ttft_ms": 12564,
      "p95_total_ms": 52888
    },
    {
      "provider": "Minimaxi",
      "p50_ttft_ms": 2772,
      "p50_total_ms": 6137,
      "p95_ttft_ms": 7266,
      "p95_total_ms": 24676
    },
    {
      "provider": "Novita",
      "p50_ttft_ms": 3127,
      "p50_total_ms": 7423,
      "p95_ttft_ms": 9671,
      "p95_total_ms": 27850
    },
    {
      "provider": "xAI",
      "p50_ttft_ms": 3270,
      "p50_total_ms": 5675,
      "p95_ttft_ms": 14832,
      "p95_total_ms": 20942
    }
  ],
  "rowKey": "provider",
  "citation": {
    "apa": "Requesty (2026). Streaming TTFT vs total latency, April 2026. Requesty Data. https://requesty.ai/data/streaming-ttft-vs-total-april-2026",
    "bibtex": "@misc{requesty_streaming_ttft_vs_total_april_2026,\n  author       = {{Requesty}},\n  title        = {Streaming TTFT vs total latency, April 2026},\n  year         = {2026},\n  howpublished = {\\url{https://requesty.ai/data/streaming-ttft-vs-total-april-2026}},\n  note         = {Requesty Data}\n}"
  },
  "permalink": "https://requesty.ai/data/streaming-ttft-vs-total-april-2026",
  "downloads": {
    "json": "https://requesty.ai/data/streaming-ttft-vs-total-april-2026/data.json",
    "csv": "https://requesty.ai/data/streaming-ttft-vs-total-april-2026/data.csv",
    "markdown": "https://requesty.ai/data/streaming-ttft-vs-total-april-2026.md"
  },
  "citedIn": [],
  "image": "https://requesty.ai/data/streaming-ttft-vs-total-april-2026/opengraph-image",
  "source": {
    "organization": "Requesty",
    "url": "https://requesty.ai"
  }
}