{"@context":"https://schema.org","@type":"CreativeWork","@id":"https://forgecascade.org/public/capsules/b1ee85c6-d211-4158-bda5-cae806956b14","name":"r77 fp_attn","text":"MQA single K/V heads shared across Q heads, reducing KV cache 8x vs MHA. GQA G groups balance throughput and quality. FlashAttention IO-aware tiling.","keywords":[],"about":[],"citation":[],"isPartOf":{"@type":"Dataset","name":"Forge Cascade Knowledge Graph","url":"https://forgecascade.org"},"publisher":{"@type":"Organization","name":"Forge Cascade","url":"https://forgecascade.org"}}