4:["$","div",null,{"className":"min-h-screen bg-gradient-to-b from-slate-50 to-white dark:from-slate-900 dark:to-slate-950","children":[["$","a",null,{"href":"#reader-content","className":"sr-only focus:not-sr-only focus:fixed focus:left-4 focus:top-4 focus:z-50 focus:rounded-md focus:bg-slate-900 focus:px-3 focus:py-2 focus:text-white dark:focus:bg-slate-100 dark:focus:text-slate-900","children":"Skip to content"}],["$","$f",null,{"fallback":["$","div",null,{"className":"bg-muted h-16 animate-pulse rounded-lg"}],"children":[["$","$L10",null,{"moduleIds":["app/dev/reader/[tier]/[slug]/page.tsx -> @/components/ReaderHeader"]}],["$","div",null,{"id":"reader-header","className":"reader-header sticky top-0 z-10 border-b bg-white/70 backdrop-blur dark:bg-slate-900/60","children":["$","div",null,{"className":"container mx-auto flex items-center justify-between px-4 py-4","children":[["$","div",null,{"className":"flex items-center gap-3","children":[["$","$L11",null,{"href":"/advanced","className":"inline-flex items-center text-slate-600 hover:text-slate-900 dark:text-slate-300 dark:hover:text-white","children":[["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","width":24,"height":24,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-arrow-left mr-2 h-4 w-4","aria-hidden":"true","children":[["$","path","1l729n",{"d":"m12 19-7-7 7-7"}],["$","path","x3x0zl",{"d":"M19 12H5"}],"$undefined"]}]," ","Advanced"]}],["$","span",null,{"className":"inline-flex items-center rounded-full px-2 py-1 text-xs font-medium bg-rose-100 text-rose-700 dark:bg-rose-900/30 dark:text-rose-300","children":[["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","width":24,"height":24,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-zap mr-1 h-3 w-3","aria-hidden":"true","children":[["$","path","1xq2db",{"d":"M4 14a1 1 0 0 1-.78-1.63l9.9-10.2a.5.5 0 0 1 .86.46l-1.92 6.02A1 1 0 0 0 13 10h7a1 1 0 0 1 .78 1.63l-9.9 10.2a.5.5 0 0 1-.86-.46l1.92-6.02A1 1 0 0 0 11 14z"}],"$undefined"]}]," Academy Reader"]}]]}],["$","div",null,{"className":"flex items-center gap-3","children":[["$","$L11",null,{"href":"/advanced/disaggregated-inference-with-pytorch-vllm","className":"inline-flex items-center rounded-md border px-3 py-1.5 text-sm text-slate-700 hover:bg-slate-50 dark:border-slate-700 dark:text-slate-300 dark:hover:bg-slate-800","aria-label":"Exit Reader and view original lesson","children":"Exit Reader"}],["$","$L11",null,{"href":"?p=0","className":"hidden rounded-md border px-3 py-1.5 text-xs text-slate-600 hover:bg-slate-50 sm:inline-flex dark:border-slate-700 dark:text-slate-300 dark:hover:bg-slate-800","aria-label":"Reset Reader display preferences","title":"Reset Reader preferences","children":"Reset"}]]}]]}]}]]}],[["$","$L12",null,{"total":3,"exitHref":"/advanced/disaggregated-inference-with-pytorch-vllm"}],["$","$L13",null,{"targetId":"reader-content"}]],["$","div",null,{"className":"grid gap-8 p-6 md:grid-cols-[1fr_280px] md:p-8","children":[["$","$f",null,{"fallback":["$","div",null,{"className":"bg-muted min-h-96 animate-pulse rounded-lg"}],"children":[["$","$L10",null,{"moduleIds":["app/dev/reader/[tier]/[slug]/page.tsx -> @/components/ReaderContent"]}],["$","$L14",null,{"lesson":{"slug":"disaggregated-inference-with-pytorch-vllm","tier":"advanced","metadata":{"title":"Disaggregated Inference for Scalable LLMs","description":"LLM inference bottlenecks arise from coupled prefill (prompt processing) and decode (token generation) phases. Disaggregation separates them for parallel scaling.","difficulty":"Advanced","duration":1,"objectives":["Explain prefill/decode separation in LLM inference.","Set up PyTorch and vLLM for disaggregated processing.","Optimize throughput and latency in production environments.","Handle scaling challenges like load balancing.","Benchmark disaggregated vs. monolithic inference."],"tags":[]},"content":"$15","wordCount":174,"readTime":1},"sections":[{"title":"Introduction","body":"

LLM inference bottlenecks arise from coupled prefill (prompt processing) and decode (token generation) phases. Disaggregation separates them for parallel scaling.

"},{"title":"Key Concepts","body":"

Prefill Phase: Computes KV cache from input prompt (compute-intensive).
Decode Phase: Autoregressive token generation (memory-bound).
Disaggregation: Run prefill on GPU clusters, decode on separate ones.

"},{"title":"Implementation Steps","body":"$16"}],"current":"$17","pageIndex":0,"total":3,"prevHref":"?p=0&size=md","nextHref":"?p=1&size=md","showSummary":false,"printMode":false,"heroGrad":"from-rose-500 to-pink-500","linkColor":"prose-a:text-rose-600 dark:prose-a:text-rose-400","accent":{"badge":"bg-rose-100 text-rose-700 dark:bg-rose-900/30 dark:text-rose-300","button":"bg-rose-600 hover:bg-rose-700 text-white dark:bg-rose-500 dark:text-white"},"proseSize":"prose-lg","fontClass":"font-sans","tier":"advanced","slug":"disaggregated-inference-with-pytorch-vllm"}]]}],["$","$f",null,{"fallback":["$","div",null,{"className":"bg-muted w-64 animate-pulse rounded-lg"}],"children":[["$","$L10",null,{"moduleIds":["app/dev/reader/[tier]/[slug]/page.tsx -> @/components/ReaderSidebar"]}],["$","aside",null,{"className":"hidden md:block","children":["$","div",null,{"className":"sticky top-28 rounded-xl border border-slate-200 bg-white p-4 dark:border-slate-700 dark:bg-slate-900","children":[["$","div",null,{"className":"mb-2 text-xs uppercase tracking-wide text-slate-500","children":"Sections"}],["$","nav",null,{"className":"space-y-1","aria-label":"Section navigation","children":[["$","$L11","0",{"href":"?p=0&size=md","className":"block rounded px-3 py-1.5 text-sm transition-colors bg-slate-100 text-slate-900 dark:bg-slate-800 dark:text-slate-100","aria-current":"page","aria-label":"Go to section 1: Introduction","children":"Introduction"}],["$","$L11","1",{"href":"?p=1&size=md","className":"block rounded px-3 py-1.5 text-sm transition-colors text-slate-600 hover:bg-slate-50 dark:text-slate-300 dark:hover:bg-slate-800","aria-current":"$undefined","aria-label":"Go to section 2: Key Concepts","children":"Key Concepts"}],["$","$L11","2",{"href":"?p=2&size=md","className":"block rounded px-3 py-1.5 text-sm transition-colors text-slate-600 hover:bg-slate-50 dark:text-slate-300 dark:hover:bg-slate-800","aria-current":"$undefined","aria-label":"Go to section 3: Implementation Steps","children":"Implementation Steps"}]]}]]}]}]]}]]}]]}]