{label}

); } function RagBlock({ x, y, w, label, tok, state, o = 1 }) { // state: 'idle'|'fresh'|'cached' const col = state==='cached'?COLORS.green : state==='fresh'?COLORS.coral : COLORS.purple; const tag = state==='cached'?'cached ·×0.1' : state==='fresh'?'fresh ·×1' : null; return (

{label} {tok}t {tag && {tag}}

); } // ════════════════════════════════════════════════════════════════════════════ // SCENE — RETRIEVAL (RAG): THE ANTI-CACHE (length 23s) // ════════════════════════════════════════════════════════════════════════════ const CHUNKS = 5, TOK_PER = 512; function SceneRAG() { const { localTime: lt } = useSprite(); const setup = ramp(lt, 0.2, 1.0); const colX = 760, colW = 380, sysY = 230, chunkY0 = 312, ch = 60, cgap = 12; const mx = 1560, my = 470; const dbXY = [320, 470]; // retrieve: chunks fan in 1.6→4.4 const chunkAt = (i) => 1.8 + i*0.42; const nChunks = Array.from({length:CHUNKS}).filter((_,i)=> lt > chunkAt(i)).length; const flowRetrieve = lt > 1.6 && lt < 4.6; // phases const showCount = pulse(lt, 4.8, 23, 0.4); const bustPhase = lt > 8.6; // cache-bust reveal const showCost = pulse(lt, 13.4, 23, 0.4); const sysState = bustPhase ? 'cached' : 'idle'; const chunkState = bustPhase ? 'fresh' : 'idle'; const totalChunkTok = nChunks * TOK_PER; const perQueryCost = (CHUNKS*TOK_PER)/1e6 * PRICE.input; // fresh price const chunkMidY = (i) => chunkY0 + i*(ch+cgap) + ch/2; return ( <> {/* query */}

query · "refund policy?"

{/* vector DB */} {/* retrieval wires DB -> each chunk */} {Array.from({length:nChunks}).map((_,i)=>( ))} {/* context column: system prompt (stable) + retrieved chunks */}

the prompt sent to the model

{Array.from({length:nChunks}).map((_,i)=>( ))} {/* model */} {nChunks>=CHUNKS && } {/* per-query token counter */} {showCount > 0.01 && (

injected every query

+{fmtNum(totalChunkTok)}

{CHUNKS} chunks × {TOK_PER} tokens

)} {/* cache-bust callout */} {bustPhase && (

chunks change every query → cache miss

only the stable system prompt keeps the discount

)} {/* cost line */} {showCost > 0.01 && (

{fmtNum(CHUNKS*TOK_PER)} × $3/M = {fmtUSD(perQueryCost)}

per query — on every call, at full fresh price

)} RAG retrieves the top-K chunks from a vector DB and injects them into the prompt. Five chunks of ~512 tokens = 2,560 fresh input tokens added to every query. And they're query-specific — so unlike the system prompt, they rarely hit the cache. That's full-price input tokens, paid on every single call. The levers: retrieve fewer, smaller chunks, rerank hard, and cache hot documents. ); } Object.assign(window, { SceneRAG, VectorDB, RagBlock });