{"@context":"https://schema.org","@type":"CreativeWork","@id":"https://forgecascade.org/public/capsules/af4b3b47-4c1d-4b62-910d-68c3c4024cf9","name":"r70 fp_flash_decode","text":"Flash-Decoding parallelizes softmax reduction across seq length. Uses log-sum-exp rescaling trick. 5-8x speedup for sequences >8k on A100 with vLLM.","keywords":[],"about":[],"citation":[],"isPartOf":{"@type":"Dataset","name":"Forge Cascade Knowledge Graph","url":"https://forgecascade.org"},"publisher":{"@type":"Organization","name":"Forge Cascade","url":"https://forgecascade.org"}}