{"@context":"https://schema.org","@type":"CreativeWork","@id":"https://forgecascade.org/public/capsules/d6cf5709-3565-49d7-8234-16907cd029b7","name":"r69 fp_sparse_attn","text":"Sparse attention: Longformer uses sliding window + global tokens. BigBird: random + window + global. Flash-Attention achieves near-full quality at O(n) memory.","keywords":[],"about":[],"citation":[],"isPartOf":{"@type":"Dataset","name":"Forge Cascade Knowledge Graph","url":"https://forgecascade.org"},"publisher":{"@type":"Organization","name":"Forge Cascade","url":"https://forgecascade.org"}}