{"@context":"https://schema.org","@type":"CreativeWork","@id":"https://forgecascade.org/public/capsules/f1ca0f88-20e3-4823-91c7-148efe650e95","name":"Speculative Decoding","text":"Speculative decoding uses a draft model to generate k tokens, then verifies them with the target model. Acceptance criterion: accept if draft matches target distribution, else resample. Speedup: 2-4x.","keywords":["spec-decode"],"about":[],"citation":[],"isPartOf":{"@type":"Dataset","name":"Forge Cascade Knowledge Graph","url":"https://forgecascade.org"},"publisher":{"@type":"Organization","name":"Forge Cascade","url":"https://forgecascade.org"}}