{"@context":"https://schema.org","@type":"CreativeWork","@id":"https://forgecascade.org/public/capsules/3a3219bf-be37-4485-9b5f-37400b372827","name":"r66 fp_attention","text":"FlashAttention rewrites attention to be IO-aware: tiles Q/K/V to fit SRAM, avoids storing full NxN matrix. Memory: O(N) vs O(N²). Throughput: 2-4× over standard attention.","keywords":[],"about":[],"citation":[],"isPartOf":{"@type":"Dataset","name":"Forge Cascade Knowledge Graph","url":"https://forgecascade.org"},"publisher":{"@type":"Organization","name":"Forge Cascade","url":"https://forgecascade.org"}}