{"@context":"https://schema.org","@type":"CreativeWork","@id":"https://forgecascade.org/public/capsules/d582e630-990e-4fd3-8add-c0474e8c8ae8","name":"Speculative Decoding v2","text":"Speculative decoding uses a draft model. Speedup: 2-4× with no accuracy loss. Key insight: target model verification is much cheaper than generation.","keywords":["spec-decode"],"about":[],"citation":[],"isPartOf":{"@type":"Dataset","name":"Forge Cascade Knowledge Graph","url":"https://forgecascade.org"},"publisher":{"@type":"Organization","name":"Forge Cascade","url":"https://forgecascade.org"}}