<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:georss="http://www.georss.org/georss">
<channel>
<title>speculative rag - Servistopauto リップル</title>
<link>https://servistopauto.ru/</link>
<language>ru</language><item>
<title>Speculative decoding reimagined for multimodal large language models (97) 사진</title>
<link>https://servistopauto.ru/speculative-rag/3176-Speculative-decoding-reimagined-for-multimodal-large-language-models-97-sajin.html</link>
<pdalink>https://servistopauto.ru/speculative-rag/3176-Speculative-decoding-reimagined-for-multimodal-large-language-models-97-sajin.html</pdalink>
<guid>3176</guid>
<pubDate>Thu, 19 Feb 2026 20:32:27 +0300</pubDate>
<category>native-yes</category>

<enclosure url="https://www.marktechpost.com/wp-content/uploads/2024/03/Screenshot-2024-03-24-at-4.56.58-PM.png" type="image/png" />
<enclosure url="https://cdn-uploads.huggingface.co/production/uploads/65c2710dc79c1a6e4d22734d/kB7OTGVsC1DPMIV2femsf.png" type="image/png" />
<enclosure url="https://pbs.twimg.com/media/GpU4hNbWIAAIp-g.jpg" type="image/jpeg" />
<enclosure url="https://adpgradshow.com/project-images/R_40VMbPGgBCCQ1rj_1.png" type="image/png" />
<enclosure url="https://www.marktechpost.com/wp-content/uploads/2026/01/NVIDIA-MOBILE-3-scaled.png" type="image/png" />
<enclosure url="https://publicationsncte.org/docserver/fulltext/rte/57/3/RTE_Volume_57_Issue_3-cover-image.png" type="image/png" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/massv-multimodal-adaptation-and-self-data-distillation-for-speculative-decoding-of-vision-language-models-2.png" type="image/png" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!1cbA!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F526aaca6-8647-4928-ab5e-d569ed6e156a_1310x736.jpeg" type="image/jpeg" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!_N43!,w_1200,h_675,c_fill,f_jpg,q_auto:good,fl_progressive:steep,g_auto/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3ef0effa-4cb2-4aac-bc6b-acee2d7e270e_2816x1536.png" type="image/png" />
<enclosure url="https://miro.medium.com/v2/resize:fit:1358/format:webp/1*6cw2qpstgvDj-IadfvQK1g.png" type="image/png" />
<enclosure url="https://pbs.twimg.com/media/G-Bo6uyXcAAsyg4.png" type="image/png" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!0Kiy!,w_1200,h_675,c_fill,f_jpg,q_auto:good,fl_progressive:steep,g_auto/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feac9806f-2af0-44f4-86fe-fea03bc690ce_2816x1536.png" type="image/png" />
<enclosure url="https://pbs.twimg.com/media/Gv8Kr1BWYAA5BXe.jpg" type="image/jpeg" />
<enclosure url="https://i1.rgstatic.net/publication/389392226_From_Hours_to_Minutes_Lossless_Acceleration_of_Ultra_Long_Sequence_Generation_up_to_100K_Tokens/links/67c07fc8461fb56424ec0c13/largepreview.png" type="image/png" />
<enclosure url="https://www.preprints.org/frontend/picture/ms_xml/manuscript/e9a781ac7e8f0b59a3b6f70e7407654c/preprints-181250-g010.png" type="image/png" />
<enclosure url="https://www.sec.gov/Archives/edgar/data/1601830/000160183025000035/rxrx-20241231_g10.jpg" type="image/jpeg" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!ptwv!,w_1200,h_675,c_fill,f_jpg,q_auto:good,fl_progressive:steep,g_auto/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9e84e21f-bca4-4e49-a22c-195c92903356_1536x1024.png" type="image/png" />
<enclosure url="https://diplo-media.s3.eu-central-1.amazonaws.com/2025/01/DeepSeek-comparison.jpeg" type="image/jpeg" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!GXro!,w_1200,h_675,c_fill,f_jpg,q_auto:good,fl_progressive:steep,g_auto/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5214a17f-e2bd-4450-a9f5-a9b018ede22b_2816x1536.png" type="image/png" />
<enclosure url="https://www.preprints.org/frontend/picture/ms_xml/manuscript/e9a781ac7e8f0b59a3b6f70e7407654c/preprints-181250-g006.png" type="image/png" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!zJKT!,w_1200,h_675,c_fill,f_jpg,q_auto:good,fl_progressive:steep,g_auto/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F161d69a2-6c8c-4330-8176-dd64f5781b02_2816x1536.png" type="image/png" />
<enclosure url="https://www.sec.gov/Archives/edgar/data/1601830/000160183025000035/rxrx-20241231_g37.jpg" type="image/jpeg" />
<enclosure url="https://i1.rgstatic.net/publication/395944262_Teaching_AI_to_Feel_A_Collaborative_Full-Body_Exploration_of_Emotive_Communication/links/68da0a59ffdca73694b42104/largepreview.png" type="image/png" />
<enclosure url="https://www.sec.gov/Archives/edgar/data/1601830/000160183025000035/rxrx-20241231_g27.jpg" type="image/jpeg" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/flash-latent-aware-semi-autoregressive-speculative-decoding-for-multimodal-tasks-4.png" type="image/png" />
<enclosure url="https://i.ytimg.com/vi/17cPQ3rgRdI/maxresdefault.jpg" type="image/jpeg" />
<enclosure url="https://www.marktechpost.com/wp-content/uploads/2024/01/Blog-Banner-2.png" type="image/png" />
<enclosure url="https://www.preprints.org/frontend/picture/ms_xml/manuscript/e9a781ac7e8f0b59a3b6f70e7407654c/preprints-181250-g018.png" type="image/png" />
<enclosure url="https://www.sundeepteki.org/uploads/3/8/2/4/38242873/gemini-blog-sections-copy_orig.png" type="image/png" />
<enclosure url="https://pbs.twimg.com/media/Gm_YzmFWsAA9CzH.jpg" type="image/jpeg" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/steering-multimodal-large-language-models-decoding-for-context-aware-safety-1.png" type="image/png" />
<enclosure url="https://www.sec.gov/Archives/edgar/data/1601830/000160183025000035/rxrx-20241231_g26.jpg" type="image/jpeg" />
<enclosure url="https://journals.sagepub.com/cms/10.1177/13548565231155076/asset/5ffe310a-5866-483d-b321-26914142029d/assets/images/large/10.1177_13548565231155076-fig6.jpg" type="image/jpeg" />
<enclosure url="https://arxiv.org/html/2412.16553v1/x1.png" type="image/png" />
<enclosure url="https://www.marktechpost.com/wp-content/uploads/2023/12/Screenshot-2023-12-25-at-11.56.19-PM.png" type="image/png" />
<enclosure url="https://developer.download.nvidia.com/images/tensorrt/inference-tech-blog-sa-external-think-smart-1920x1080.png" type="image/png" />
<enclosure url="https://i1.rgstatic.net/publication/397956516_Dialogues_of_Sense_and_Algorithm_Reconfiguring_Arts-Based_Research_in_the_AI_Era/links/692dbf2c1a621a227cf7075a/largepreview.png" type="image/png" />
<enclosure url="https://developer.download.nvidia.com/images/pretrained-ai-models/rtx-ai-garage-3-steps-20b.png" type="image/png" />
<enclosure url="https://www.sundeepteki.org/uploads/3/8/2/4/38242873/zapier-ai-fluency_orig.jpeg" type="image/jpeg" />
<enclosure url="https://ila.onlinelibrary.wiley.com/cms/asset/4e16ace7-3c22-48f4-b13a-a5e4bad16cb8/rrq591-fig-0017-m.jpg" type="image/jpeg" />
<enclosure url="https://www.sec.gov/Archives/edgar/data/1601830/000160183025000035/rxrx-20241231_g49.jpg" type="image/jpeg" />
<enclosure url="https://ila.onlinelibrary.wiley.com/cms/asset/e6a118ea-bc67-47ca-b6b8-667fb04804c4/rrq591-gra-0001-m.jpg" type="image/jpeg" />
<enclosure url="https://towardsdatascience.com/wp-content/uploads/2026/01/image-107.jpg" type="image/jpeg" />
<enclosure url="https://www.sundeepteki.org/uploads/3/8/2/4/38242873/applied-ml-downloads_orig.png" type="image/png" />
<enclosure url="https://www.sundeepteki.org/uploads/3/8/2/4/38242873/training-genaichallenges_orig.png" type="image/png" />
<enclosure url="https://arxiv.org/html/2406.09416v1/x2.png" type="image/png" />
<enclosure url="http://www.jmis.org/journal/jmis/jmis-10-4/gif/jmis-10-4-301-g1.gif" type="image/gif" />
<enclosure url="https://www.sec.gov/Archives/edgar/data/1601830/000160183025000035/rxrx-20241231_g20.jpg" type="image/jpeg" />
<enclosure url="https://miro.medium.com/1*hw250paSDdIV7cnYT412JQ.png" type="image/png" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!y-Au!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c98a2dd-612e-4e73-8ad5-41532c87d7d3_1081x760.png" type="image/png" />
<enclosure url="https://arxiv.org/html/2503.06508v1/x2.png" type="image/png" />
<enclosure url="https://www.mdpi.com/computers/computers-14-00015/article_deploy/html/images/computers-14-00015-g002.png" type="image/png" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/flash-latent-aware-semi-autoregressive-speculative-decoding-for-multimodal-tasks-3.png" type="image/png" />
<enclosure url="https://pbs.twimg.com/media/Gk5Clh3WIAAFetb.jpg" type="image/jpeg" />
<enclosure url="https://www.mdpi.com/land/land-14-01647/article_deploy/html/images/land-14-01647-g007.png" type="image/png" />
<enclosure url="https://www.preprints.org/frontend/picture/ms_xml/manuscript/e9a781ac7e8f0b59a3b6f70e7407654c/preprints-181250-g015.png" type="image/png" />
<enclosure url="https://diplo-media.s3.eu-central-1.amazonaws.com/2025/01/Gartner-curve-1.jpg" type="image/jpeg" />
<enclosure url="https://pbs.twimg.com/media/GssPpJWWMAA9HF4.jpg" type="image/jpeg" />
<enclosure url="https://www.marktechpost.com/wp-content/uploads/2024/03/Screenshot-2024-03-20-at-2.23.16-PM.png" type="image/png" />
<enclosure url="https://pbs.twimg.com/media/GtRIpN6XQAAzXKe.jpg" type="image/jpeg" />
<enclosure url="https://i1.rgstatic.net/publication/346535459_Decoding_individual_identity_from_brain_activity_elicited_in_imagining_common_experiences/links/5fc670f445851568d131f728/largepreview.png" type="image/png" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/specvlm-fast-speculative-decoding-in-vision-language-models-4.png" type="image/png" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!KuBX!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F209bca56-73ee-4d15-ad36-38ba4bfb7ce9_1173x672.jpeg" type="image/jpeg" />
<enclosure url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhR0ibv4V7WK2ZM5BY8wWiJfrXKy8VeJd_C2qUR1wzQ4zdsQWV4dLfyTDsuPM-Jpa6Ttf3bcAIwCkQIfP52CewieedaqT493z4D7eoRqwO_xJWxOFLyzonnTfv9rfZAqtpaJGZ8MFqF4Pyfj1eENtTuy_TNPAuYCoqiHBZoNKXPIl94kDEDjxGY87ayUPP4/s1792/DALL%C2%B7E%202025-01-20%2015.33.57%20-%20A%20minimalistic,%20clean,%20and%20futuristic%20landscape%20in%20the%20Climate%20Kybernetik%20Signal%20style,%20with%20organic%20and%20data%20patterns%20subtly%20merging.%20Large%20bold%20text.webp" type="image/webp" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/hivis-hiding-visual-tokens-from-the-drafter-for-speculative-decoding-in-vision-language-models-3.png" type="image/png" />
<enclosure url="https://journals.sagepub.com/cms/10.1177/13548565231155076/asset/50b51d94-c69b-48ff-b38e-f71051acfd3d/assets/images/large/10.1177_13548565231155076-fig5.jpg" type="image/jpeg" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!sptb!,w_1200,h_675,c_fill,f_jpg,q_auto:good,fl_progressive:steep,g_auto/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3960e4a-2bec-47e5-a1ec-3192c684bdb5_2816x1536.png" type="image/png" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/speculative-decoding-reimagined-for-multimodal-large-language-models-4.png" type="image/png" />
<content:encoded><![CDATA[<p><img src="https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=10164113735743126" alt="Energy-based transformer models for improved reasoning"></p> <p><img src="https://www.marktechpost.com/wp-content/uploads/2024/03/Screenshot-2024-03-24-at-4.56.58-PM.png" alt="Apple Researchers Propose a Multimodal AI Approach to Device-Directed Speech Detection with Large Language Models - MarkTechPost"></p> <p><img src="https://cdn-uploads.huggingface.co/production/uploads/65c2710dc79c1a6e4d22734d/kB7OTGVsC1DPMIV2femsf.png" alt="Daily Papers - Hugging Face"></p> <p><img src="https://pbs.twimg.com/media/GpU4hNbWIAAIp-g.jpg" alt="Yuchen Zeng (@yzeng58) / Posts / X"></p> <p><img src="https://adpgradshow.com/project-images/R_40VMbPGgBCCQ1rj_1.png" alt="USYD Architecture, Design and Planning Graduate Exhibition 2025"></p> <p><img src="x-raw-image:///891bbffbddd64ab19d82be9f867a2bad6a5df8fc994e1772b72743e90f7a3d4e" alt="HiViS: Hiding Visual Tokens from the Drafter for Speculative Decoding in Vision-Language Models"></p> <p><img src="https://www.marktechpost.com/wp-content/uploads/2026/01/NVIDIA-MOBILE-3-scaled.png" alt="This AI Paper Unveils the Potential of Speculative Decoding for Faster Large Language Model Inference: A Comprehensive Analysis - MarkTechPost"></p> <p><img src="https://publicationsncte.org/docserver/fulltext/rte/57/3/RTE_Volume_57_Issue_3-cover-image.png" alt="Annotated Bibliography of Research in the Teaching of English | ncte.org"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/massv-multimodal-adaptation-and-self-data-distillation-for-speculative-decoding-of-vision-language-models-2.png" alt="Literature Review] MASSV: Multimodal Adaptation and Self-Data Distillation for Speculative Decoding of Vision-Language Models"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!1cbA!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F526aaca6-8647-4928-ab5e-d569ed6e156a_1310x736.jpeg" alt="2024 Backward Pass: The Definitive Guide to AI in 2024"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!_N43!,w_1200,h_675,c_fill,f_jpg,q_auto:good,fl_progressive:steep,g_auto/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3ef0effa-4cb2-4aac-bc6b-acee2d7e270e_2816x1536.png" alt="Very ML | State-of-the-art Machine Learning News Feed | Infomate"></p> <p><img src="https://miro.medium.com/v2/resize:fit:1358/format:webp/1*6cw2qpstgvDj-IadfvQK1g.png" alt="Speculative Decoding: A technique that makes LLMs faster without sacrificing quality | by Sujith K. Surendran | Medium"></p> <p><img src="https://pbs.twimg.com/media/G-Bo6uyXcAAsyg4.png" alt="Yuchen Zeng (@yzeng58) / Posts / X"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!0Kiy!,w_1200,h_675,c_fill,f_jpg,q_auto:good,fl_progressive:steep,g_auto/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feac9806f-2af0-44f4-86fe-fea03bc690ce_2816x1536.png" alt="Very ML | State-of-the-art Machine Learning News Feed | Infomate"></p> <p><img src="https://pbs.twimg.com/media/Gv8Kr1BWYAA5BXe.jpg" alt="Yuchen Zeng (@yzeng58) / Posts / X"></p> <p><img src="https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=122163381746563025" alt="Energy-based transformer models for improved reasoning"></p> <p><img src="https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=3266884513449603" alt="Energy-based transformer models for improved reasoning"></p> <p><img src="https://miro.medium.com/v2/resize:fit:1400/0*MZE673ev1fO8-smK" alt="FX Sentiment, Reimagined: How Large Language Models Are Transforming Currency Markets | by Martin Bauer | Medium"></p> <p><img src="https://i1.rgstatic.net/publication/389392226_From_Hours_to_Minutes_Lossless_Acceleration_of_Ultra_Long_Sequence_Generation_up_to_100K_Tokens/links/67c07fc8461fb56424ec0c13/largepreview.png" alt="PDF) From Hours to Minutes: Lossless Acceleration of Ultra Long Sequence Generation up to 100K Tokens"></p> <p><img src="https://www.preprints.org/frontend/picture/ms_xml/manuscript/e9a781ac7e8f0b59a3b6f70e7407654c/preprints-181250-g010.png" alt="LLMs4All: A Review of Large Language Models Across Academic Disciplines[v1] | Preprints.org"></p> <p><img src="https://www.sec.gov/Archives/edgar/data/1601830/000160183025000035/rxrx-20241231_g10.jpg" alt="rxrx-20241231"></p> <p><img src="https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=24928345463432677" alt="Energy-based transformer models for improved reasoning"></p> <p><img src="https://media.licdn.com/dms/image/v2/D5605AQGSYu8p2QwuCw/videocover-high/B56ZrFXD9.HYBU-/0/1764247750297?e=2147483647&v=beta&t=OnlbCrTtNJBzbB93yS_GLRkrGwWY7boG5C7nVge0HWQ" alt="Anjul S. - AI/ML Platform Owner | Agentic AI, Deep Learning, LLMs, Industrial AI/ML, IIoT, Digital Twins | Data & MLOps for Semiconductor Fab and Smart Manufacturing | LinkedIn"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!ptwv!,w_1200,h_675,c_fill,f_jpg,q_auto:good,fl_progressive:steep,g_auto/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9e84e21f-bca4-4e49-a22c-195c92903356_1536x1024.png" alt="Very ML | State-of-the-art Machine Learning News Feed | Infomate"></p> <p><img src="https://imgv2-1-f.scribdassets.com/img/document/86976727/original/4f43902517/1?v=1" alt="Collaborative Approaches To The Digital in English Studies | PDF | Cognitive Science | Epistemology"></p> <p><img src="https://diplo-media.s3.eu-central-1.amazonaws.com/2025/01/DeepSeek-comparison.jpeg" alt="Revisiting 10 AI and digital forecasts for 2025: Predictions and Reality - Diplo"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!GXro!,w_1200,h_675,c_fill,f_jpg,q_auto:good,fl_progressive:steep,g_auto/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5214a17f-e2bd-4450-a9f5-a9b018ede22b_2816x1536.png" alt="Very ML | State-of-the-art Machine Learning News Feed | Infomate"></p> <p><img src="https://www.preprints.org/frontend/picture/ms_xml/manuscript/e9a781ac7e8f0b59a3b6f70e7407654c/preprints-181250-g006.png" alt="LLMs4All: A Review of Large Language Models Across Academic Disciplines[v1] | Preprints.org"></p> <p><img src="https://imgv2-2-f.scribdassets.com/img/document/742051659/original/079d1d52ac/1?v=1" alt="Schools Reimagined (Jacqueline Grennon Brooks, Martin G. Brooks) (Z-Library) | PDF | Constructivism (Philosophy Of Education) | Curriculum"></p> <p><img src="x-raw-image:///59f6ce52b22dd4368717362297494dc48ba714ae1f5788792a93f6bcb207221c" alt="CONFERENCE PROGRAM"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!zJKT!,w_1200,h_675,c_fill,f_jpg,q_auto:good,fl_progressive:steep,g_auto/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F161d69a2-6c8c-4330-8176-dd64f5781b02_2816x1536.png" alt="Very ML | State-of-the-art Machine Learning News Feed | Infomate"></p> <p><img src="https://www.sec.gov/Archives/edgar/data/1601830/000160183025000035/rxrx-20241231_g37.jpg" alt="rxrx-20241231"></p> <p><img src="https://i1.rgstatic.net/publication/395944262_Teaching_AI_to_Feel_A_Collaborative_Full-Body_Exploration_of_Emotive_Communication/links/68da0a59ffdca73694b42104/largepreview.png" alt="PDF) Teaching AI to Feel: A Collaborative, Full-Body Exploration of Emotive Communication"></p> <p><img src="https://www.sec.gov/Archives/edgar/data/1601830/000160183025000035/rxrx-20241231_g27.jpg" alt="rxrx-20241231"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/flash-latent-aware-semi-autoregressive-speculative-decoding-for-multimodal-tasks-4.png" alt="Literature Review] FLASH: Latent-Aware Semi-Autoregressive Speculative Decoding for Multimodal Tasks"></p> <p><img src="https://i.ytimg.com/vi/17cPQ3rgRdI/maxresdefault.jpg" alt="Universe of Incredible Models"></p> <p><img src="https://beehiiv-images-production.s3.amazonaws.com/uploads/asset/file/1a5a1443-b471-4708-9905-79188e526908/Frame_268.png?t=1733176956" alt="🌁#78: Enabling the Future of AI (2025)"></p> <p><img src="x-raw-image:///032ac1fabe4c519db236c6705aed4f3ef475498201097b45da3346de0f58259e" alt="Copy of Ctrl+S Conference Programme"></p> <p><img src="https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=4003428849928838" alt="Energy-based transformer models for improved reasoning"></p> <p><img src="https://beehiiv-images-production.s3.amazonaws.com/uploads/asset/file/a051612d-e252-441b-8d3b-37143abdd984/Frame_323.png?t=1764023757" alt="Universe of Incredible Models"></p> <p><img src="https://www.marktechpost.com/wp-content/uploads/2024/01/Blog-Banner-2.png" alt="JPMorgan AI Research Introduces DocGraphLM: An Innovative AI Framework Merging Pre-Trained Language Models and Graph Semantics for Enhanced Document Representation in Information Extraction and QA - MarkTechPost"></p> <p><img src="https://www.preprints.org/frontend/picture/ms_xml/manuscript/e9a781ac7e8f0b59a3b6f70e7407654c/preprints-181250-g018.png" alt="LLMs4All: A Review of Large Language Models Across Academic Disciplines[v1] | Preprints.org"></p> <p><img src="https://www.sundeepteki.org/uploads/3/8/2/4/38242873/gemini-blog-sections-copy_orig.png" alt="Sundeep Teki - AI Blog | Insights on GenAI, Career, ML Systems"></p> <p><img src="https://pbs.twimg.com/media/Gm_YzmFWsAA9CzH.jpg" alt="Yuchen Zeng (@yzeng58) / Posts / X"></p> <p><img src="x-raw-image:///6af19c4a04c6b2761b451db9e28858227b8735116a6852cc19a11f886629eecb" alt="18th edition – 2025 tech trends report"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/steering-multimodal-large-language-models-decoding-for-context-aware-safety-1.png" alt="논문 리뷰] Steering Multimodal Large Language Models Decoding for Context-Aware Safety"></p> <p><img src="https://media.theresanaiforthat.com/chatgpt.png?height=768" alt="ChatGPT 5.2 - AI Tool For ChatGPT"></p> <p><img src="https://www.sec.gov/Archives/edgar/data/1601830/000160183025000035/rxrx-20241231_g26.jpg" alt="rxrx-20241231"></p> <p><img src="https://lookaside.instagram.com/seo/google_widget/crawler/?media_id=3807110552385358525" alt="A new project I hope to start working on soon."></p> <p><img src="https://journals.sagepub.com/cms/10.1177/13548565231155076/asset/5ffe310a-5866-483d-b321-26914142029d/assets/images/large/10.1177_13548565231155076-fig6.jpg" alt="Spray without politics? Contrasting street-based perceptions and computer vision framings of graffitied Rome - Helton Levy, Eleonora Diamanti, 2023"></p> <p><img src="https://arxiv.org/html/2412.16553v1/x1.png" alt="Papers by Zhanpeng Zeng"></p> <p><img src="https://www.marktechpost.com/wp-content/uploads/2023/12/Screenshot-2023-12-25-at-11.56.19-PM.png" alt="Researchers from the University of Washington and Allen Institute for AI Introduce Time Vectors: A Simple Tool to Customize Language Models to New Time Periods - MarkTechPost"></p> <p><img src="x-raw-image:///598a317199fdc14e0aed6bd44aef954a8055bf222cb65001169327d57efdae5a" alt="Findings of the Association for Computational Linguistics: ACL 2024"></p> <p><img src="https://lookaside.instagram.com/seo/google_widget/crawler/?media_id=3146444080708230125" alt="Bookshop by Uro | This book is a theoretical backdrop for architects as much as it is for businesspeople and employees. With curiosity and skepticism, it... | Instagram"></p> <p><img src="https://developer.download.nvidia.com/images/tensorrt/inference-tech-blog-sa-external-think-smart-1920x1080.png" alt="AI Models | NVIDIA Developer"></p> <p><img src="https://i1.rgstatic.net/publication/397956516_Dialogues_of_Sense_and_Algorithm_Reconfiguring_Arts-Based_Research_in_the_AI_Era/links/692dbf2c1a621a227cf7075a/largepreview.png" alt="PDF) Dialogues of Sense and Algorithm: Reconfiguring Arts-Based Research in the AI Era"></p> <p><img src="https://lh7-us.googleusercontent.com/fNgMN4fgY_0S1ayBE30bsYtKf_Ogex0RnWcVuH_zOOo8VqICS5pZNalBPNeavm18ccxPY8HsVA_BlX2rMx4IoeJoYOoMMK4jqnqUKmTNErXYTjFZc-pDC0v0YaNVKEduUvRAy72gaw9jdABlI3kgWg0" alt="This AI Paper Unveils the Potential of Speculative Decoding for Faster Large Language Model Inference: A Comprehensive Analysis - MarkTechPost"></p> <p><img src="https://developer.download.nvidia.com/images/pretrained-ai-models/rtx-ai-garage-3-steps-20b.png" alt="AI Models | NVIDIA Developer"></p> <p><img src="https://www.sundeepteki.org/uploads/3/8/2/4/38242873/zapier-ai-fluency_orig.jpeg" alt="Sundeep Teki - AI Blog | Insights on GenAI, Career, ML Systems"></p> <p><img src="https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=8509764762384007" alt="Energy-based transformer models for improved reasoning"></p> <p><img src="https://ila.onlinelibrary.wiley.com/cms/asset/4e16ace7-3c22-48f4-b13a-a5e4bad16cb8/rrq591-fig-0017-m.jpg" alt="Literacy in the Time of Artificial Intelligence - Kalantzis - 2025 - Reading Research Quarterly - Wiley Online Library"></p> <p><img src="https://www.sec.gov/Archives/edgar/data/1601830/000160183025000035/rxrx-20241231_g49.jpg" alt="rxrx-20241231"></p> <p><img src="https://ila.onlinelibrary.wiley.com/cms/asset/e6a118ea-bc67-47ca-b6b8-667fb04804c4/rrq591-gra-0001-m.jpg" alt="Literacy in the Time of Artificial Intelligence - Kalantzis - 2025 - Reading Research Quarterly - Wiley Online Library"></p> <p><img src="https://towardsdatascience.com/wp-content/uploads/2026/01/image-107.jpg" alt="Very ML | State-of-the-art Machine Learning News Feed | Infomate"></p> <p><img src="https://www.sundeepteki.org/uploads/3/8/2/4/38242873/applied-ml-downloads_orig.png" alt="Sundeep Teki - AI Blog | Insights on GenAI, Career, ML Systems"></p> <p><img src="https://media.licdn.com/dms/image/v2/D5622AQEXr5eem_RUHg/feedshare-shrink_800/B56Zd63aSpHoAg-/0/1750113038782?e=2147483647&v=beta&t=MxU9vBaVj80-NHWEd-_oEDBL57mkWREpD5Lfj2NBmEo" alt="Top LinkedIn Content on Data-Driven Strategy Formulation"></p> <p><img src="https://www.sundeepteki.org/uploads/3/8/2/4/38242873/training-genaichallenges_orig.png" alt="Sundeep Teki - AI Blog | Insights on GenAI, Career, ML Systems"></p> <p><img src="x-raw-image:///b0752b97da1d5d253bc8eb25e02464095e6da7992f8a6847b074fb95695f1ab6" alt="Findings of the Association for Computational Linguistics: ACL 2024"></p> <p><img src="https://arxiv.org/html/2406.09416v1/x2.png" alt="Papers by Zhanpeng Zeng"></p> <p><img src="http://www.jmis.org/journal/jmis/jmis-10-4/gif/jmis-10-4-301-g1.gif" alt="JMIS(Journal of Multimedia Information System)"></p> <p><img src="https://www.sec.gov/Archives/edgar/data/1601830/000160183025000035/rxrx-20241231_g20.jpg" alt="rxrx-20241231"></p> <p><img src="https://miro.medium.com/1*hw250paSDdIV7cnYT412JQ.png" alt="Speculative Decoding: A technique that makes LLMs faster without sacrificing quality | by Sujith K. Surendran | Medium"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!y-Au!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c98a2dd-612e-4e73-8ad5-41532c87d7d3_1081x760.png" alt="2024 Backward Pass: The Definitive Guide to AI in 2024"></p> <p><img src="https://arxiv.org/html/2503.06508v1/x2.png" alt="Papers by Zhanpeng Zeng"></p> <p><img src="https://www.mdpi.com/computers/computers-14-00015/article_deploy/html/images/computers-14-00015-g002.png" alt="A Comprehensive Exploration of 6G Wireless Communication Technologies"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/flash-latent-aware-semi-autoregressive-speculative-decoding-for-multimodal-tasks-3.png" alt="Literature Review] FLASH: Latent-Aware Semi-Autoregressive Speculative Decoding for Multimodal Tasks"></p> <p><img src="https://pbs.twimg.com/media/Gk5Clh3WIAAFetb.jpg" alt="Yuchen Zeng (@yzeng58) / Posts / X"></p> <p><img src="https://www.mdpi.com/land/land-14-01647/article_deploy/html/images/land-14-01647-g007.png" alt="Urban Land Use and Value in the Digital Economy: A Scoping Review of Disrupted Activities, Behaviours, and Mobility"></p> <p><img src="https://www.preprints.org/frontend/picture/ms_xml/manuscript/e9a781ac7e8f0b59a3b6f70e7407654c/preprints-181250-g015.png" alt="LLMs4All: A Review of Large Language Models Across Academic Disciplines[v1] | Preprints.org"></p> <p><img src="https://diplo-media.s3.eu-central-1.amazonaws.com/2025/01/Gartner-curve-1.jpg" alt="Revisiting 10 AI and digital forecasts for 2025: Predictions and Reality - Diplo"></p> <p><img src="https://pbs.twimg.com/media/GssPpJWWMAA9HF4.jpg" alt="Yuchen Zeng (@yzeng58) / Posts / X"></p> <p><img src="https://www.marktechpost.com/wp-content/uploads/2024/03/Screenshot-2024-03-20-at-2.23.16-PM.png" alt="This AI Paper from KAIST AI Unveils ORPO: Elevating Preference Alignment in Language Models to New Heights - MarkTechPost"></p> <p><img src="https://pbs.twimg.com/media/GtRIpN6XQAAzXKe.jpg" alt="Yuchen Zeng (@yzeng58) / Posts / X"></p> <p><img src="x-raw-image:///167572aad64cbd1681e1366868be0eaa00ecbd6e80dd84a2c0b2a6a926c24e98" alt="Senza titolo"></p> <p><img src="https://i1.rgstatic.net/publication/346535459_Decoding_individual_identity_from_brain_activity_elicited_in_imagining_common_experiences/links/5fc670f445851568d131f728/largepreview.png" alt="PDF) Decoding individual identity from brain activity elicited in imagining common experiences"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/specvlm-fast-speculative-decoding-in-vision-language-models-4.png" alt="Literature Review] SpecVLM: Fast Speculative Decoding in Vision-Language Models"></p> <p><img src="x-raw-image:///f740e776c8ef6d710dbefa734b626d33552e5310e2009bd0d894283364f2d95e" alt="Senza titolo"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!KuBX!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F209bca56-73ee-4d15-ad36-38ba4bfb7ce9_1173x672.jpeg" alt="2024 Backward Pass: The Definitive Guide to AI in 2024"></p> <p><img src="https://media.licdn.com/dms/image/v2/D4E22AQFJtficFZVFxw/feedshare-shrink_800/B4EZXqYju_H0Ag-/0/1743394071121?e=2147483647&v=beta&t=DRTnqS7byD1R7uBfRRVF9-7nRe830y_CANYK4J0p1OQ" alt="Latest Techniques in LLM Development"></p> <p><img src="https://media.licdn.com/dms/image/v2/D4D12AQH73vMwyMTd7Q/article-cover_image-shrink_720_1280/B4DZm51waaH4AI-/0/1759759505027?e=2147483647&v=beta&t=ngVabWGVssNkMDBvgZrV-55UO2ZJ3wIEdkGkSTkCZN0" alt="Ragas: Open-source framework for RAG pipeline evaluation" | Abdul Rehman Azam posted on the topic | LinkedIn"></p> <p><img src="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhR0ibv4V7WK2ZM5BY8wWiJfrXKy8VeJd_C2qUR1wzQ4zdsQWV4dLfyTDsuPM-Jpa6Ttf3bcAIwCkQIfP52CewieedaqT493z4D7eoRqwO_xJWxOFLyzonnTfv9rfZAqtpaJGZ8MFqF4Pyfj1eENtTuy_TNPAuYCoqiHBZoNKXPIl94kDEDjxGY87ayUPP4/s1792/DALL%C2%B7E%202025-01-20%2015.33.57%20-%20A%20minimalistic,%20clean,%20and%20futuristic%20landscape%20in%20the%20Climate%20Kybernetik%20Signal%20style,%20with%20organic%20and%20data%20patterns%20subtly%20merging.%20Large%20bold%20text.webp" alt="The Definitive Primer on Artificial Intelligence and the Rise of ASI"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/hivis-hiding-visual-tokens-from-the-drafter-for-speculative-decoding-in-vision-language-models-3.png" alt="论文评述] HiViS: Hiding Visual Tokens from the Drafter for Speculative Decoding in Vision-Language Models"></p> <p><img src="https://journals.sagepub.com/cms/10.1177/13548565231155076/asset/50b51d94-c69b-48ff-b38e-f71051acfd3d/assets/images/large/10.1177_13548565231155076-fig5.jpg" alt="Spray without politics? Contrasting street-based perceptions and computer vision framings of graffitied Rome - Helton Levy, Eleonora Diamanti, 2023"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!sptb!,w_1200,h_675,c_fill,f_jpg,q_auto:good,fl_progressive:steep,g_auto/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3960e4a-2bec-47e5-a1ec-3192c684bdb5_2816x1536.png" alt="Very ML | State-of-the-art Machine Learning News Feed | Infomate"></p> <p><img src="x-raw-image:///4be4856af0ff83324db9aed835c0f0b33077165fd2b628b83b229bc7333c5835" alt="HiViS: Hiding Visual Tokens from the Drafter for Speculative Decoding in Vision-Language Models"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/speculative-decoding-reimagined-for-multimodal-large-language-models-4.png" alt="Literature Review] Speculative Decoding Reimagined for Multimodal Large Language Models"></p>]]></content:encoded>
</item><item>
<title>Speculative decoding 原理 (97) 사진</title>
<link>https://servistopauto.ru/speculative-rag/3177-Speculative-decoding-Yuan-Li-97-sajin.html</link>
<pdalink>https://servistopauto.ru/speculative-rag/3177-Speculative-decoding-Yuan-Li-97-sajin.html</pdalink>
<guid>3177</guid>
<pubDate>Thu, 19 Feb 2026 20:32:27 +0300</pubDate>
<category>native-yes</category>

<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/revisiting-judge-decoding-from-first-principles-via-training-free-distributional-divergence-2.png" type="image/png" />
<enclosure url="https://oss-emcsprod-public.modb.pro/image/auto/modb_20240923_f32e51f6-7980-11ef-8d3c-fa163eb4f6be.png" type="image/png" />
<enclosure url="https://developer.nvidia.cn/zh-cn/blog/wp-content/uploads/2025/09/speculative-decoding-verification-phase-target-model.gif" type="image/gif" />
<enclosure url="https://img2024.cnblogs.com/blog/1850883/202505/1850883-20250505075617474-488999040.jpg" type="image/jpeg" />
<enclosure url="https://developer.nvidia.cn/zh-cn/blog/wp-content/uploads/2025/09/speculative-decoding-draft-target-approach.gif" type="image/gif" />
<enclosure url="https://pic3.zhimg.com/v2-a7e0cc8fee987124da5ef0e2aeeed134_1440w.jpg" type="image/jpeg" />
<enclosure url="https://datahonor.com/blog/images/llm_sps/full_code.png" type="image/png" />
<enclosure url="https://blog.vllm.ai/assets/figures/spec-decode/figure3.png" type="image/png" />
<enclosure url="http://www.linsight.cn/f5c015c/fi_example.png" type="image/png" />
<enclosure url="https://baoyu.io/images/llm/how-to-make-llms-go-fast/qkv-matrix4.png" type="image/png" />
<enclosure url="https://img2024.cnblogs.com/blog/1850883/202504/1850883-20250420184030185-723663890.jpg" type="image/jpeg" />
<enclosure url="https://developer.qcloudimg.com/http-save/yehe-1472475/d61493ecd7a17a6cbf45b651fb9e31b6.png" type="image/png" />
<enclosure url="https://awps-assets.meituan.net/mit-x/blog-images-bundle-2017/16ffd18b.png" type="image/png" />
<enclosure url="https://developer.nvidia.cn/zh-cn/blog/wp-content/uploads/2025/09/speculative-decoding-eagle-drafting-mechanism.gif" type="image/gif" />
<enclosure url="https://s3.51cto.com/oss/202309/01/48e5c5154a668a5a1ce8823c704c470ad80569.png" type="image/png" />
<enclosure url="https://blog.vllm.ai/assets/figures/spec-decode/figure10.png" type="image/png" />
<enclosure url="https://orion-rsrc.hyper.ai/media/2025/05/ap8nlqd4.png" type="image/png" />
<enclosure url="https://static.mianbaoban-assets.eet-china.com/xinyu-images/MBXY-CR-5aa74dd7ff6a3eab818dc2ae6e0bbd54.png" type="image/png" />
<enclosure url="http://neurowave.tech/images/img/img_deepseek-23.png" type="image/png" />
<enclosure url="https://i.ytimg.com/vi/hm7VEgxhOvk/maxresdefault.jpg" type="image/jpeg" />
<enclosure url="https://qingkeai.online/upload/%E6%88%AA%E5%B1%8F2026-01-07%2021.57.42.png" type="image/png" />
<enclosure url="https://img2024.cnblogs.com/blog/1850883/202504/1850883-20250420183747998-1842667327.jpg" type="image/jpeg" />
<enclosure url="https://img.zhaoweiguo.com/uPic/2024/10/8tHkPG.png" type="image/png" />
<enclosure url="https://picx.zhimg.com/v2-81ec2decf95e5c5793e7a46d660deb39_1440w.jpg" type="image/jpeg" />
<enclosure url="https://s2.51cto.com/oss/202508/04/f9c477054778b691a4e4127744030b5e839608.png" type="image/png" />
<enclosure url="http://www.linsight.cn/f5c015c/fi_alpha.png" type="image/png" />
<enclosure url="https://simg.baai.ac.cn/hub-detail/ca493f33cdcd41d02c0276c0a27745291741660201405.webp" type="image/webp" />
<enclosure url="https://nvlabs.github.io/GDPO/static/images/gdpo_toy.png" type="image/png" />
<enclosure url="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/layerskip-assets/Llama-2-70B.png" type="image/png" />
<enclosure url="https://www.issoh.co.jp/wp/wp-content/uploads/AdobeStock_188318503.jpeg" type="image/jpeg" />
<enclosure url="https://img2024.cnblogs.com/blog/1850883/202504/1850883-20250420184329891-1790909747.jpg" type="image/jpeg" />
<enclosure url="https://files.mdnice.com/pic/17bd1f43-f4fb-4409-9f2f-0f2dc237ef5a.jpg" type="image/jpeg" />
<enclosure url="https://ai-bot.cn/wp-content/uploads/2025/09/FastMTP-website.png" type="image/png" />
<enclosure url="https://datahonor.com/blog/images/llm_sps/reject.png" type="image/png" />
<enclosure url="https://www.xebook.net/wp-content/uploads/2025/02/speculative-decoding.jpg" type="image/jpeg" />
<enclosure url="https://i.ytimg.com/vi/MAbGgsWKrg8/maxresdefault.jpg" type="image/jpeg" />
<enclosure url="https://developer.nvidia.cn/zh-cn/blog/wp-content/uploads/2025/10/Jeston-Thor-7x-png.webp" type="image/webp" />
<enclosure url="https://pic2.zhimg.com/v2-e0c585a4b4f533868549f64e8de72d69_1440w.jpg" type="image/jpeg" />
<enclosure url="https://static.mianbaoban-assets.eet-china.com/xinyu-images/MBXY-CR-01df6cb1223c7c365aea51f9a222d78c.png" type="image/png" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/respec-towards-optimizing-speculative-decoding-in-reinforcement-learning-systems-1.png" type="image/png" />
<enclosure url="https://pic1.zhimg.com/v2-4ad39476532d7080d05aadb3068c1368_1440w.gif" type="image/gif" />
<enclosure url="https://i-blog.csdnimg.cn/img_convert/b5f90b5cd13519f57fc2b5bafba6f290.png" type="image/png" />
<enclosure url="https://img2024.cnblogs.com/blog/1850883/202504/1850883-20250420184059028-853276524.jpg" type="image/jpeg" />
<enclosure url="https://img-blog.csdnimg.cn/direct/5075effc73064b5daa77a971cb9cdee1.png" type="image/png" />
<enclosure url="http://www.linsight.cn/f5c015c/fi_choose_gamma.png" type="image/png" />
<enclosure url="https://resouces.modelscope.cn/paper-cover-images/2512/20573/cover.jpeg" type="image/jpeg" />
<enclosure url="http://www.linsight.cn/f5c015c/fi_walltime.png" type="image/png" />
<enclosure url="http://www.linsight.cn/f5c015c/fi_expected_token_num.png" type="image/png" />
<enclosure url="https://pic1.zhimg.com/v2-5b3a5b12765ab248f5361d4a9b927276_1440w.jpg" type="image/jpeg" />
<enclosure url="http://www.linsight.cn/f5c015c/acce_alog.png" type="image/png" />
<enclosure url="https://pic4.zhimg.com/v2-20b398df5cb95ad0f813fafd34eaa205_1440w.jpg" type="image/jpeg" />
<enclosure url="https://awps-assets.meituan.net/mit-x/blog-images-bundle-2017/1c9f3009.png" type="image/png" />
<enclosure url="http://neurowave.tech/images/img/img_deepseek-3.png" type="image/png" />
<enclosure url="https://img2024.cnblogs.com/blog/1850883/202504/1850883-20250420183840259-2070650832.jpg" type="image/jpeg" />
<enclosure url="https://image.woshipm.com/2025/06/10/c235ab7a-4569-11f0-ad57-00163e09d72f.png" type="image/png" />
<enclosure url="https://s2.51cto.com/oss/202508/04/123252e0978af302beb829208896a107bc1280.png" type="image/png" />
<enclosure url="https://i-blog.csdnimg.cn/direct/9a4f25a272f2410bafcae3d973e9b34d.png" type="image/png" />
<enclosure url="https://www8.zhizaozhe.com/public/uploads/picture/20250501/4fe1f3a02e72eb6f0a79fc2b301a052e.png" type="image/png" />
<enclosure url="https://substackcdn.com/image/fetch/w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb97a8ac7-db97-497f-866d-10400729d51e_1248x764.png" type="image/png" />
<enclosure url="https://developer.qcloudimg.com/http-save/yehe-1424957/374288cdceee1286e4fa6811ee7f480e.png" type="image/png" />
<enclosure url="https://substackcdn.com/image/fetch/w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6a623a4-fdbc-4abf-883b-3c2679b4ad4d_1460x640.png" type="image/png" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/lantern-accelerating-visual-autoregressive-models-with-relaxed-speculative-decoding-1.png" type="image/png" />
<enclosure url="https://substackcdn.com/image/fetch/w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facc49abf-bc55-45fd-9697-99c9434087d0_864x916.png" type="image/png" />
<enclosure url="https://jamchang.com/notes/speculative-decoding.webp" type="image/webp" />
<enclosure url="https://image.woshipm.com/2025/06/10/c3120200-4569-11f0-ad57-00163e09d72f.png" type="image/png" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/fast-dllm-training-free-acceleration-of-diffusion-llm-by-enabling-kv-cache-and-parallel-decoding-1.png" type="image/png" />
<enclosure url="https://blog.vllm.ai/assets/figures/spec-decode/figure6.png" type="image/png" />
<enclosure url="https://img2024.cnblogs.com/blog/1850883/202504/1850883-20250420184310910-434932906.jpg" type="image/jpeg" />
<enclosure url="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/layerskip-assets/Llama-2-13B.png" type="image/png" />
<content:encoded><![CDATA[<p><img src="https://p3-xtjj-sign.byteimg.com/tos-cn-i-73owjymdk6/59d8cd318d61402fa5d4ccfa1ee928dc~tplv-73owjymdk6-jj-mark-v1:0:0:0:0:5o6Y6YeR5oqA5pyv56S-5Yy6IEAg5ouG5oi_6ICB5paZ:q75.awebp?rk3s=f64ab15b&x-expires=1769882006&x-signature=kv4FDFSGcesJOvUtVF81Ufmfy7o%3D" alt="Speculative Decoding 推测解码方案详解本文系统介绍了从早期草稿模型方法、Prompt Lookup - 掘金"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/revisiting-judge-decoding-from-first-principles-via-training-free-distributional-divergence-2.png" alt="论文评述] Revisiting Judge Decoding from First Principles via Training-Free Distributional Divergence"></p> <p><img src="https://oss-emcsprod-public.modb.pro/image/auto/modb_20240923_f32e51f6-7980-11ef-8d3c-fa163eb4f6be.png" alt="大型语言模型推理详解- 墨天轮"></p> <p><img src="https://developer.nvidia.cn/zh-cn/blog/wp-content/uploads/2025/09/speculative-decoding-verification-phase-target-model.gif" alt="用于降低AI 推理延迟的预测性解码简介- NVIDIA 技术博客"></p> <p><img src="https://img2024.cnblogs.com/blog/1850883/202505/1850883-20250505075617474-488999040.jpg" alt="探秘Transformer系列之（32）--- Lookahead Decoding - 罗西的思考- 博客园"></p> <p><img src="https://developer.nvidia.cn/zh-cn/blog/wp-content/uploads/2025/09/speculative-decoding-draft-target-approach.gif" alt="用于降低AI 推理延迟的预测性解码简介- NVIDIA 技术博客"></p> <p><img src="https://pic3.zhimg.com/v2-a7e0cc8fee987124da5ef0e2aeeed134_1440w.jpg" alt="手撕LLM-Speculative Decoding】大模型迈向"并行"解码时代- 知乎"></p> <p><img src="https://datahonor.com/blog/images/llm_sps/full_code.png" alt="LLM Speculative Sampling - Data Honor"></p> <p><img src="https://blog.vllm.ai/assets/figures/spec-decode/figure3.png" alt="How Speculative Decoding Boosts vLLM Performance by up to 2.8x | vLLM Blog"></p> <p><img src="http://www.linsight.cn/f5c015c/fi_example.png" alt="大模型推理加速-投机解码| Linsight"></p> <p><img src="https://www.daiwk.net/~gitbook/image?url=https%3A%2F%2F1725978874-files.gitbook.io%2F%7E%2Ffiles%2Fv0%2Fb%2Fgitbook-x-prod.appspot.com%2Fo%2Fspaces%252F-MAwALUwRCP16VZ2I1KP%252Fuploads%252Fgit-blob-1633a127ac27996b7ad5622568659645c0478645%252Fspeculative-decoding-verify.png%3Falt%3Dmedia&width=768&dpr=3&quality=100&sign=c15b0346&sv=2" alt="1.3.llm_archs | collections"></p> <p><img src="https://baoyu.io/images/llm/how-to-make-llms-go-fast/qkv-matrix4.png" alt="如何加速大语言模型的运行[译] | 宝玉的分享"></p> <p><img src="https://img2024.cnblogs.com/blog/1850883/202504/1850883-20250420184030185-723663890.jpg" alt="探秘Transformer系列之（30）--- 投机解码- 罗西的思考- 博客园"></p> <p><img src="https://developer.qcloudimg.com/http-save/yehe-1472475/d61493ecd7a17a6cbf45b651fb9e31b6.png" alt="自然语言生成中的解码方法汇总-腾讯云开发者社区-腾讯云"></p> <p><img src="https://awps-assets.meituan.net/mit-x/blog-images-bundle-2017/16ffd18b.png" alt="纠删码存储系统中的投机性部分写技术- 美团技术团队"></p> <p><img src="https://developer.nvidia.cn/zh-cn/blog/wp-content/uploads/2025/09/speculative-decoding-eagle-drafting-mechanism.gif" alt="用于降低AI 推理延迟的预测性解码简介- NVIDIA 技术博客"></p> <p><img src="https://api.ibos.cn/v4/weapparticle/accesswximg?aid=85256&url=aHR0cHM6Ly9tbWJpei5xcGljLmNuL3N6X21tYml6X3BuZy96aFZsd2o5NnRUaWF1cldaalo4cmxSQnFYUWxKMWxYV1JJU1Z6RzBmeEY3aEdGZnVDUmlhZHlXQTNZZG5xRmliVXFtVEJSbEl5Rk9zSXJ5WVRrVkJ6SEQ1Zy82NDA/d3hfZm10PXBuZw==" alt="万字综述10+ 种LLM 投机采样推理加速方案- 53AI-AI知识库|企业AI知识库|大模型知识库|AIHub"></p> <p><img src="https://s3.51cto.com/oss/202309/01/48e5c5154a668a5a1ce8823c704c470ad80569.png" alt="苹果芯跑大模型不用降计算精度，投机采样杀疯了，GPT-4也在用-51CTO.COM"></p> <p><img src="https://blog.vllm.ai/assets/figures/spec-decode/figure10.png" alt="How Speculative Decoding Boosts vLLM Performance by up to 2.8x | vLLM Blog"></p> <p><img src="https://orion-rsrc.hyper.ai/media/2025/05/ap8nlqd4.png" alt="vLLM 实战教程汇总，从环境配置到大模型部署，中文文档追踪重磅更新| 资讯| HyperAI超神经"></p> <p><img src="https://p3-xtjj-sign.byteimg.com/tos-cn-i-73owjymdk6/121295b9c61f47828053eba48e4789f5~tplv-73owjymdk6-jj-mark-v1:0:0:0:0:5o6Y6YeR5oqA5pyv56S-5Yy6IEAgU2U3ZW4yNTg=:q75.awebp?rk3s=f64ab15b&x-expires=1769400721&x-signature=wt5Kqr761HwbUsvS3Rx%2BJGHhamk%3D" alt="Speculative Decoding 推测解码方案详解本文系统介绍了从早期草稿模型方法、Prompt Lookup - 掘金"></p> <p><img src="https://static.mianbaoban-assets.eet-china.com/xinyu-images/MBXY-CR-5aa74dd7ff6a3eab818dc2ae6e0bbd54.png" alt="Cursor 内部工作原理是什么？-电子工程专辑"></p> <p><img src="http://neurowave.tech/images/img/img_deepseek-23.png" alt="8. DeepSeek-V3（V2）详读4（架构+ MTP） - Neurowave"></p> <p><img src="https://i.ytimg.com/vi/hm7VEgxhOvk/maxresdefault.jpg" alt="Speculative Decoding Explained"></p> <p><img src="https://qingkeai.online/upload/%E6%88%AA%E5%B1%8F2026-01-07%2021.57.42.png" alt="比EAGLE-3 快2.5 倍、Qwen3 推理加速6.17 倍！DFlash 如何利用扩散模型终结自回归瓶颈？"></p> <p><img src="https://p3-xtjj-sign.byteimg.com/tos-cn-i-73owjymdk6/44232dbb33b34982b442327f6c3a601d~tplv-73owjymdk6-jj-mark-v1:0:0:0:0:5o6Y6YeR5oqA5pyv56S-5Yy6IEAgU2U3ZW4yNTg=:q75.awebp?rk3s=f64ab15b&x-expires=1769400721&x-signature=7IddCeyyB9xoSp%2F%2BjSth14ytGts%3D" alt="Speculative Decoding 推测解码方案详解本文系统介绍了从早期草稿模型方法、Prompt Lookup - 掘金"></p> <p><img src="https://i0.wp.com/novita-blog.s3.ap-southeast-1.amazonaws.com/will-speculative-decoding-harm-llm-inference-accuracy-QQ_1724643517500.png?resize=1280%2C1812&ssl=1" alt="推测性解码会造成危害吗LLM 推理准确性？ - Novita"></p> <p><img src="https://aijishu.com/img/bVb09o" alt="大语言模型推理性能优化综述- 极术社区- 连接开发者与智能计算生态"></p> <p><img src="https://img2024.cnblogs.com/blog/1850883/202504/1850883-20250420183747998-1842667327.jpg" alt="探秘Transformer系列之（30）--- 投机解码- 罗西的思考- 博客园"></p> <p><img src="x-raw-image:///2f2dea9563b1af67c89ef12cf56ea07010ffba37c47fa4b2abdc9118bf6eac19" alt="PowerPoint 演示文稿"></p> <p><img src="https://img.zhaoweiguo.com/uPic/2024/10/8tHkPG.png" alt="6.3.6. Transformers 4.45.2 — 新溪-gordon V2025.02 文档"></p> <p><img src="https://api.ibos.cn/v4/weapparticle/accesswximg?aid=85256&url=aHR0cHM6Ly9tbWJpei5xcGljLmNuL3N6X21tYml6X3BuZy96aFZsd2o5NnRUaWF1cldaalo4cmxSQnFYUWxKMWxYV1JDcnY2QkxpYXpIeUZLR1hyTDI4dXplUDRJNWMwOGc1emdrZFV0R0o1Z2ZweEhsVzFwVGliQ2g5Zy82NDA/d3hfZm10PXBuZw==" alt="万字综述10+ 种LLM 投机采样推理加速方案- 53AI-AI知识库|企业AI知识库|大模型知识库|AIHub"></p> <p><img src="https://picx.zhimg.com/v2-81ec2decf95e5c5793e7a46d660deb39_1440w.jpg" alt="Speculative Decoding 推测解码方案详解- 知乎"></p> <p><img src="https://s2.51cto.com/oss/202508/04/f9c477054778b691a4e4127744030b5e839608.png" alt="2 万字总结：全面梳理大模型Inference 相关技术-AI.x-AIGC专属社区-51CTO.COM"></p> <p><img src="http://www.linsight.cn/f5c015c/fi_alpha.png" alt="大模型推理加速-投机解码| Linsight"></p> <p><img src="https://simg.baai.ac.cn/hub-detail/ca493f33cdcd41d02c0276c0a27745291741660201405.webp" alt="LayerSkip: 使用自推测解码加速大模型推理- 智源社区"></p> <p><img src="https://aijishu.com/img/bVb9s3" alt="LLM性能优化]聊聊长文本推理性能优化方向- 极术社区- 连接开发者与智能计算生态"></p> <p><img src="https://www.rwkv.cn/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frwkv-7-architecture.f80b7f09.jpg&w=3840&q=75" alt="RWKV 架构及历史- RWKV 中国"></p> <p><img src="https://nvlabs.github.io/GDPO/static/images/gdpo_toy.png" alt="比EAGLE-3 快2.5 倍、Qwen3 推理加速6.17 倍！DFlash 如何利用扩散模型终结自回归瓶颈？"></p> <p><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/layerskip-assets/Llama-2-70B.png" alt="LayerSkip：使用自推测解码加速大模型推理"></p> <p><img src="https://www.issoh.co.jp/wp/wp-content/uploads/AdobeStock_188318503.jpeg" alt="Speculative Decoding（投機的デコーディング）とは何かを徹底解説 | 株式会社一創"></p> <p><img src="https://img2024.cnblogs.com/blog/1850883/202504/1850883-20250420184329891-1790909747.jpg" alt="探秘Transformer系列之（30）--- 投机解码- 罗西的思考- 博客园"></p> <p><img src="https://files.mdnice.com/pic/17bd1f43-f4fb-4409-9f2f-0f2dc237ef5a.jpg" alt="dpsk r1训练细节- mdnice 墨滴"></p> <p><img src="https://ai-bot.cn/wp-content/uploads/2025/09/FastMTP-website.png" alt="FastMTP - 腾讯开源的大语言模型推理加速技术| AI工具集"></p> <p><img src="https://datahonor.com/blog/images/llm_sps/reject.png" alt="LLM Speculative Sampling - Data Honor"></p> <p><img src="https://www.xebook.net/wp-content/uploads/2025/02/speculative-decoding.jpg" alt="什么是推测性解码？"></p> <p><img src="https://i0.wp.com/novita-blog.s3.ap-southeast-1.amazonaws.com/will-speculative-decoding-harm-llm-inference-accuracy-QQ_1724643429008.png?resize=1280%2C1652&ssl=1" alt="推测性解码会造成危害吗LLM 推理准确性？ - Novita"></p> <p><img src="https://i.ytimg.com/vi/MAbGgsWKrg8/maxresdefault.jpg" alt="【生成式AI導論 2024】第16講：可以加速所有語言模型生成速度的神奇外掛 — Speculative Decoding"></p> <p><img src="https://developer.nvidia.cn/zh-cn/blog/wp-content/uploads/2025/10/Jeston-Thor-7x-png.webp" alt="通过NVIDIA Jetson AGX Thor 实现7 倍生成式AI 性能，解锁更快速、更智能的边缘模型- NVIDIA 技术博客"></p> <p><img src="https://pic2.zhimg.com/v2-e0c585a4b4f533868549f64e8de72d69_1440w.jpg" alt="手撕LLM-Speculative Decoding】大模型迈向"并行"解码时代- 知乎"></p> <p><img src="https://static.mianbaoban-assets.eet-china.com/xinyu-images/MBXY-CR-01df6cb1223c7c365aea51f9a222d78c.png" alt="Cursor 内部工作原理是什么？-电子工程专辑"></p> <p><img src="https://i0.wp.com/novita-blog.s3.ap-southeast-1.amazonaws.com/will-speculative-decoding-harm-llm-inference-accuracy-QQ_1724643834718.png?resize=1278%2C1838&ssl=1" alt="推测性解码会造成危害吗LLM 推理准确性？ - Novita"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/respec-towards-optimizing-speculative-decoding-in-reinforcement-learning-systems-1.png" alt="论文评述] ReSpec: Towards Optimizing Speculative Decoding in Reinforcement Learning Systems"></p> <p><img src="https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=10164596735367160" alt="愛好AI Engineer 電子報第19 期出刊囉～ 🚀 * **Speculative Decoding**：這是一種加速大型語言模型（LLM）生成內容的技術，通過預測tokens 的方式讓模型可以並行預測多個詞，從而大幅提高處理速度。其原理是給定部分預測"></p> <p><img src="https://pic1.zhimg.com/v2-4ad39476532d7080d05aadb3068c1368_1440w.gif" alt="Speculative Decoding 推测解码方案详解- 知乎"></p> <p><img src="https://i-blog.csdnimg.cn/img_convert/b5f90b5cd13519f57fc2b5bafba6f290.png" alt="Speculative Decoding 推测解码方案详解-CSDN博客"></p> <p><img src="https://img2024.cnblogs.com/blog/1850883/202504/1850883-20250420184059028-853276524.jpg" alt="探秘Transformer系列之（30）--- 投机解码- 罗西的思考- 博客园"></p> <p><img src="https://i0.wp.com/novita-blog.s3.ap-southeast-1.amazonaws.com/will-speculative-decoding-harm-llm-inference-accuracy-QQ_1724643474009.png?resize=1280%2C1729&ssl=1" alt="推测性解码会造成危害吗LLM 推理准确性？ - Novita"></p> <p><img src="https://cdn.zhuanzhi.ai/vfiles/ffcfde0748c9151b719a7849e2dc35f9!/format/webp/quality/5" alt="大型语言模型加速生成技术》最新综述- 专知VIP"></p> <p><img src="https://img-blog.csdnimg.cn/direct/5075effc73064b5daa77a971cb9cdee1.png" alt="ChatGLM大模型推理加速之Speculative Decoding-CSDN博客"></p> <p><img src="https://api.ibos.cn/v4/weapparticle/accesswximg?aid=85256&url=aHR0cHM6Ly9tbWJpei5xcGljLmNuL3N6X21tYml6X3BuZy96aFZsd2o5NnRUaWF1cldaalo4cmxSQnFYUWxKMWxYV1JZS01TMVp4WVhVc3dpYlBuajlucklJbjBSQW9QT0NMV29BVDdWaWFuTkpJNnM5UDE3QTcwdm8yZy82NDA/d3hfZm10PXBuZw==" alt="万字综述10+ 种LLM 投机采样推理加速方案- 53AI-AI知识库|企业AI知识库|大模型知识库|AIHub"></p> <p><img src="https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=250745244743853" alt="清楚有趣的說明·加速LLM 推理2~3 倍的技巧：Speculative Decoding 🤓"></p> <p><img src="http://www.linsight.cn/f5c015c/fi_choose_gamma.png" alt="大模型推理加速-投机解码| Linsight"></p> <p><img src="https://resouces.modelscope.cn/paper-cover-images/2512/20573/cover.jpeg" alt="论文详情"></p> <p><img src="http://www.linsight.cn/f5c015c/fi_walltime.png" alt="大模型推理加速-投机解码| Linsight"></p> <p><img src="http://www.linsight.cn/f5c015c/fi_expected_token_num.png" alt="大模型推理加速-投机解码| Linsight"></p> <p><img src="https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=10235380112585864" alt="太神奇了！ OpenAI 這幾天釋出一個預測輸出（predicted output）功能，可以大幅增加我們的token 生成的速度！（飛快，是飛快！） （尤其我們的提示詞只是要將某個內容的部分內容被取代時，用它會有飛快的效果！ 不難想，就有點像是其他都不需要改變，已經快取起來了 ..."></p> <p><img src="https://pic1.zhimg.com/v2-5b3a5b12765ab248f5361d4a9b927276_1440w.jpg" alt="手撕LLM-Speculative Decoding】大模型迈向"并行"解码时代- 知乎"></p> <p><img src="http://www.linsight.cn/f5c015c/acce_alog.png" alt="大模型推理加速-投机解码| Linsight"></p> <p><img src="https://pic4.zhimg.com/v2-20b398df5cb95ad0f813fafd34eaa205_1440w.jpg" alt="LLM之Speculative Decoding实战- 知乎"></p> <p><img src="https://api.ibos.cn/v4/weapparticle/accesswximg?aid=85256&url=aHR0cHM6Ly9tbWJpei5xcGljLmNuL3N6X21tYml6X3BuZy96aFZsd2o5NnRUaWF1cldaalo4cmxSQnFYUWxKMWxYV1I4UThwYW9CU0ZjSW40S21RNWJCMTJSQUwxZ2pvRXVSVmlha0tpY2xNQ2Y4QWJ5cWJYelFWRHVtUS82NDA/d3hfZm10PXBuZw==" alt="万字综述10+ 种LLM 投机采样推理加速方案- 53AI-AI知识库|企业AI知识库|大模型知识库|AIHub"></p> <p><img src="https://awps-assets.meituan.net/mit-x/blog-images-bundle-2017/1c9f3009.png" alt="纠删码存储系统中的投机性部分写技术- 美团技术团队"></p> <p><img src="http://neurowave.tech/images/img/img_deepseek-3.png" alt="8. DeepSeek-V3（V2）详读4（架构+ MTP） - Neurowave"></p> <p><img src="https://img2024.cnblogs.com/blog/1850883/202504/1850883-20250420183840259-2070650832.jpg" alt="探秘Transformer系列之（30）--- 投机解码- 罗西的思考- 博客园"></p> <p><img src="https://p3-xtjj-sign.byteimg.com/tos-cn-i-73owjymdk6/61203a61d8e04bc5acb91c300578cd60~tplv-73owjymdk6-jj-mark-v1:0:0:0:0:5o6Y6YeR5oqA5pyv56S-5Yy6IEAg6Zuy6Zeq5LiW55WM:q75.awebp?rk3s=f64ab15b&x-expires=1769610174&x-signature=7PyysJfopNzntO%2FbodTYAI8sNhs%3D" alt="使用推测解码提高LLM 推理速度使用尖端优化技术加速推理的实用指南​ 欢迎来到雲闪世界。大型语言模型非- 掘金"></p> <p><img src="https://image.woshipm.com/2025/06/10/c235ab7a-4569-11f0-ad57-00163e09d72f.png" alt="MiniCPM 4.0 技术报告：端侧速度的奔涌，是模型的自我Rag | 人人都是产品经理"></p> <p><img src="https://s2.51cto.com/oss/202508/04/123252e0978af302beb829208896a107bc1280.png" alt="2 万字总结：全面梳理大模型Inference 相关技术-AI.x-AIGC专属社区-51CTO.COM"></p> <p><img src="https://api.ibos.cn/v4/weapparticle/accesswximg?aid=85256&url=aHR0cHM6Ly9tbWJpei5xcGljLmNuL3N6X21tYml6X3BuZy96aFZsd2o5NnRUaWF1cldaalo4cmxSQnFYUWxKMWxYV1JoYVcwbk9JN2p4WVFZZWljb2NIOTlUQU1rRThvaWJzbnVSR0ZJcWszOG9GNWVvbkQ0QkFyem9YZy82NDA/d3hfZm10PXBuZw==" alt="万字综述10+ 种LLM 投机采样推理加速方案- 53AI-AI知识库|企业AI知识库|大模型知识库|AIHub"></p> <p><img src="https://i-blog.csdnimg.cn/direct/9a4f25a272f2410bafcae3d973e9b34d.png" alt="李宏毅GENERATIVE AI——第16讲（5/17下）——Speculative Decoding-CSDN博客"></p> <p><img src="https://www8.zhizaozhe.com/public/uploads/picture/20250501/4fe1f3a02e72eb6f0a79fc2b301a052e.png" alt="企业级大模型推理和部署平台2025 - 工业大数据"></p> <p><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb97a8ac7-db97-497f-866d-10400729d51e_1248x764.png" alt="New OpenAI —— DeepSeek-V3 与R1 的关键技术与认知| MLOasis"></p> <p><img src="https://chatdoc-arxiv.oss-us-west-1.aliyuncs.com/images/arxiv/2410.18351/two_page_thumbnail.jpeg?AWSAccessKeyId=LTAI5t6b2G8eTtEBczAMwjhc&Signature=%2BgpkYINOs2vG8fO7Cv4368AhEYs%3D&Expires=9223372038624028672" alt="AdaEDL: Early Draft Stopping for Speculative Decoding of Large Language Models via an Entropy-based Lower Bound on Token Acceptance Probability"></p> <p><img src="https://developer.qcloudimg.com/http-save/yehe-1424957/374288cdceee1286e4fa6811ee7f480e.png" alt="大语言模型推理加速技术综述：基于多硬件平台的系统性分析与性能评测，涵盖CPU、GPU、FPGA、ASIC和存算一体的全面解析-腾讯云开发者社区-腾讯云"></p> <p><img src="https://i0.wp.com/novita-blog.s3.ap-southeast-1.amazonaws.com/will-speculative-decoding-harm-llm-inference-accuracy-QQ_1724643566358.png?resize=1176%2C706&ssl=1" alt="推测性解码会造成危害吗LLM 推理准确性？ - Novita"></p> <p><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6a623a4-fdbc-4abf-883b-3c2679b4ad4d_1460x640.png" alt="New OpenAI —— DeepSeek-V3 与R1 的关键技术与认知| MLOasis"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/lantern-accelerating-visual-autoregressive-models-with-relaxed-speculative-decoding-1.png" alt="论文评述] LANTERN: Accelerating Visual Autoregressive Models with Relaxed Speculative Decoding"></p> <p><img src="https://cdn.10100.com/content/20251023/b59c12ab-4852-4e95-bdf5-374bfcafaa47.png?x-oss-process=style/wmlogo" alt="vLLM 核心机密（四）：vLLM 进阶特性深度解析- 大数跨境"></p> <p><img src="https://substackcdn.com/image/fetch/w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facc49abf-bc55-45fd-9697-99c9434087d0_864x916.png" alt="New OpenAI —— DeepSeek-V3 与R1 的关键技术与认知| MLOasis"></p> <p><img src="https://jamchang.com/notes/speculative-decoding.webp" alt="Jam Notes - Speculative Decoding"></p> <p><img src="https://image.woshipm.com/2025/06/10/c3120200-4569-11f0-ad57-00163e09d72f.png" alt="MiniCPM 4.0 技术报告：端侧速度的奔涌，是模型的自我Rag | 人人都是产品经理"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/fast-dllm-training-free-acceleration-of-diffusion-llm-by-enabling-kv-cache-and-parallel-decoding-1.png" alt="论文评述] Fast-dLLM: Training-free Acceleration of Diffusion LLM by Enabling KV Cache and Parallel Decoding"></p> <p><img src="https://blog.vllm.ai/assets/figures/spec-decode/figure6.png" alt="How Speculative Decoding Boosts vLLM Performance by up to 2.8x | vLLM Blog"></p> <p><img src="https://img2024.cnblogs.com/blog/1850883/202504/1850883-20250420184310910-434932906.jpg" alt="探秘Transformer系列之（30）--- 投机解码- 罗西的思考- 博客园"></p> <p><img src="https://p3-volc-community-sign.byteimg.com/tos-cn-i-tlddhu82om/778300020f384d1b8ad280e31e85dc87~tplv-tlddhu82om-image.image?=&rk3s=8031ce6d&x-expires=1769413112&x-signature=MIAVGqhgei0DFk9Wd%2BG0b2QfiRE%3D" alt="LLM（十二）| DeepSeek-V3 技术报告深度解读——开源模型的巅峰之作- 文章- 开发者社区- 火山引擎"></p> <p><img src="x-raw-image:///ee909920d1db0f6a6197aad4ec964797dfd13842fa54777e2037273a24fd3838" alt="计算加速套件TACO Kit TACO LLM 推理加速引擎"></p> <p><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/layerskip-assets/Llama-2-13B.png" alt="LayerSkip：使用自推测解码加速大模型推理"></p>]]></content:encoded>
</item><item>
<title>Speculative decoding (93) 사진</title>
<link>https://servistopauto.ru/speculative-rag/3178-Speculative-decoding-93-sajin.html</link>
<pdalink>https://servistopauto.ru/speculative-rag/3178-Speculative-decoding-93-sajin.html</pdalink>
<guid>3178</guid>
<pubDate>Thu, 19 Feb 2026 20:32:27 +0300</pubDate>
<category>native-yes</category>

<enclosure url="https://substackcdn.com/image/fetch/$s_!pcG-!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f2f57bb-ed18-40b1-b910-dcc2f4f94c7e_2679x1005.png" type="image/png" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/improving-multi-candidate-speculative-decoding-2.png" type="image/png" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/polybasic-speculative-decoding-through-a-theoretical-perspective-2.png" type="image/png" />
<enclosure url="https://i.ytimg.com/vi/Qh9cIEelCj4/maxresdefault.jpg" type="image/jpeg" />
<enclosure url="https://newsroom.intel.com/wp-content/uploads/2025/07/speculative-decoding-algorithm-03.jpg" type="image/jpeg" />
<enclosure url="https://rocm.blogs.amd.com/_images/withdraftmodel.png" type="image/png" />
<enclosure url="https://docs-legacy.sambanova.ai/sambastudio/latest/_images/spec-decoding-playground-export-24-11-1.png" type="image/png" />
<enclosure url="https://developer-blogs.nvidia.com/wp-content/uploads/2025/09/speculative-decoding-generation-with-without.gif" type="image/gif" />
<enclosure url="https://miro.medium.com/v2/resize:fit:1400/1*hApzZiGSrRQULLaXK7PVLg.jpeg" type="image/jpeg" />
<enclosure url="https://lmsys.org/images/blog/spec_forge/offline_online.jpg" type="image/jpeg" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/specextend-a-drop-in-enhancement-for-speculative-decoding-of-long-sequences-1.png" type="image/png" />
<enclosure url="https://www.infocusp.com/blogs/speculative-decoding/images/speculative-decoding.webp" type="image/webp" />
<enclosure url="https://i.ytimg.com/vi/S-8yr_RibJ4/maxresdefault.jpg" type="image/jpeg" />
<enclosure url="https://developer-blogs.nvidia.com/wp-content/uploads/2025/09/speculative-decoding-verification-phase-target-model.gif" type="image/gif" />
<enclosure url="https://blog.vllm.ai/assets/figures/spec-decode/figure8.png" type="image/png" />
<enclosure url="https://miro.medium.com/v2/resize:fit:1400/1*cKEACKReWvmo3KL3giHcRA.png" type="image/png" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/pearl-parallel-speculative-decoding-with-adaptive-draft-length-1.png" type="image/png" />
<enclosure url="https://developer-blogs.nvidia.com/wp-content/uploads/2024/12/speculative-decoding-workflow-1024x885.jpg" type="image/jpeg" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!6DkQ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F708a883b-1188-4829-9ab4-079a1ecd8450_970x1610.png" type="image/png" />
<enclosure url="https://www.cs.cmu.edu/~csd-phd-blog/2025/suffix-decoding/fig0.webp" type="image/webp" />
<enclosure url="https://philkrav.com/spec-dist.png" type="image/png" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!xssb!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2a6e4bb-3829-4695-8216-cde0937ce3f3_1215x662.png" type="image/png" />
<enclosure url="https://miro.medium.com/v2/resize:fit:1400/1*eux4zLCCN_3_T6z1x8Fbqg.jpeg" type="image/jpeg" />
<enclosure url="https://miro.medium.com/v2/resize:fit:1400/1*QQdHHW85TllZZ96ur0GTxg.png" type="image/png" />
<enclosure url="https://docs-legacy.sambanova.ai/sambastudio/latest/_images/sd-pair-endpoint-24-11-1.png" type="image/png" />
<enclosure url="https://charlesxu.io/assets/images/speculative-decoding/medusa.png" type="image/png" />
<enclosure url="https://blog.vllm.ai/assets/figures/spec-decode/figure10.png" type="image/png" />
<enclosure url="https://ma.slideslive.com/library/presentations/39027731/thumbnail/specexec-massively-parallel-speculative-decoding-for-interactive-llm-inference-on-consumer-devices_Mt8PYH_big.png" type="image/png" />
<enclosure url="https://blog.vllm.ai/assets/figures/spec-decode/figure9.png" type="image/png" />
<enclosure url="https://bentoml.com/llm/assets/images/tp-1-spec-decoding-dc775d27386402a221febdaf8997e861.png" type="image/png" />
<enclosure url="https://rocm.blogs.amd.com/_images/InferenceLatencySpeedup70B.png" type="image/png" />
<enclosure url="https://cdn.prod.website-files.com/67d1b10ea1804fdfad7d7a65/67d1b10ea1804fdfad7d7c1e_cache-c5451304ba262c2a1fcef810ee141b64.webp" type="image/webp" />
<enclosure url="https://research-website-prod-cms-uploads.s3.us.cloud-object-storage.appdomain.cloud/Speculativedecoding_Final_ee8ae68115.png" type="image/png" />
<enclosure url="https://rocm.blogs.amd.com/_images/spd_concept3.png" type="image/png" />
<enclosure url="https://towardsdatascience.com/wp-content/uploads/2024/12/1sYU-r355eE8LL8ug8tngmQ.png" type="image/png" />
<enclosure url="https://www.marktechpost.com/wp-content/uploads/2024/04/Screenshot-2024-04-20-at-1.37.08-PM.png" type="image/png" />
<enclosure url="https://lmstudio.ai/assets/docs/speculative-decoding-setting.png" type="image/png" />
<enclosure url="https://company.hpc-ai.com/hs-fs/hubfs/image%20(5)-3.png?width=1262&height=668&name=image%20(5)-3.png" type="image/png" />
<enclosure url="https://pbs.twimg.com/media/G-_JcEPWoAAVyMM.jpg" type="image/jpeg" />
<enclosure url="https://developer-blogs.nvidia.com/wp-content/uploads/2025/09/speculative-decoding-eagle-drafting-mechanism.gif" type="image/gif" />
<enclosure url="https://www.cs.cmu.edu/~csd-phd-blog/2025/suffix-decoding/fig1.png" type="image/png" />
<enclosure url="https://scale-ml.org/posts/images/speculative_decoding/img2.png" type="image/png" />
<enclosure url="https://docs-legacy.sambanova.ai/sambastudio/latest/_images/spec-decoding-select-model-24-11-1.png" type="image/png" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!ckwS!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b48e1ee-25eb-4cd6-9946-1b66c432b0dc_1351x1150.png" type="image/png" />
<enclosure url="https://jamchang.com/notes/speculative-decoding.webp" type="image/webp" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!Zk5D!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2067c97f-4d7a-4e49-ad16-6b0f23828360_1359x1869.png" type="image/png" />
<enclosure url="https://objectstore.e2enetworks.net/e2eblog/jl/2025/blogs/speculative-decoding-with-vllm/speculative-decoding-vllm-cover.webp" type="image/webp" />
<enclosure url="https://multpletokensprediction.github.io/multipletokensprediction.github.io/static/images/fina_1.png" type="image/png" />
<enclosure url="https://www.marktechpost.com/wp-content/uploads/2023/09/Screenshot-2023-09-21-at-10.50.50-PM.png" type="image/png" />
<enclosure url="http://www.hanneshapke.com/images/speculative_decoding/speculative_decoding_comparison.png" type="image/png" />
<enclosure url="https://bentoml.com/llm/assets/images/spec-decoding-c8daf3401db4d98f369b95e6e725165c.png" type="image/png" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!wXSz!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86d97024-948b-4018-a7d9-a091bd2ba9a6_4230x1833.png" type="image/png" />
<enclosure url="https://storage.googleapis.com/gweb-research2023-media/original_images/SpecCascades-1-TradeOffs.png" type="image/png" />
<enclosure url="https://arxiv.org/html/2408.11850v1/x1.png" type="image/png" />
<enclosure url="https://blog.vllm.ai/assets/figures/spec-decode/figure2.png" type="image/png" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!aTD5!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7fb4cef4-a491-485e-9657-e8b54667c406_2552x1092.png" type="image/png" />
<enclosure url="https://pic4.zhimg.com/v2-cd8e80c2db1d2391bb0af68675de2749_1440w.jpg" type="image/jpeg" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/poss-position-specialist-generates-better-draft-for-speculative-decoding-2.png" type="image/png" />
<enclosure url="https://i.ytimg.com/vi/kw3ki7HqW4I/maxresdefault.jpg" type="image/jpeg" />
<enclosure url="https://pic4.zhimg.com/v2-20b398df5cb95ad0f813fafd34eaa205_1440w.jpg" type="image/jpeg" />
<enclosure url="https://scale-ml.org/posts/images/speculative_decoding/img1.png" type="image/png" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/mirror-speculative-decoding-breaking-the-serial-barrier-in-llm-inference-2.png" type="image/png" />
<enclosure url="https://paper-assets.alphaxiv.org/figures/2506.19830v1/LookaheadReasoningStep.jpg" type="image/jpeg" />
<enclosure url="https://picx.zhimg.com/v2-ea17477fef43d3257c250bf202ab9531_1440w.jpg" type="image/jpeg" />
<enclosure url="https://blog.vllm.ai/assets/figures/spec-decode/figure1.png" type="image/png" />
<enclosure url="https://veryunknown.com/post/speculative-sampling/speculative-sampling-probabilities.png" type="image/png" />
<enclosure url="https://charlesxu.io/assets/images/speculative-decoding/cover.png" type="image/png" />
<enclosure url="https://clova.ai/cdn/media/2025/08/231.png" type="image/png" />
<enclosure url="https://lmsys.org/images/blog/spec_forge/eagleintro.PNG" type="image/png" />
<enclosure url="https://velog.velcdn.com/images/2mini/post/add60b28-2be1-4ae8-a0dd-a26f3989ec39/image.png" type="image/png" />
<enclosure url="https://miro.medium.com/v2/resize:fit:1400/1*8XyRP1OkwC1tMO0_l6WEvw.png" type="image/png" />
<enclosure url="https://miro.medium.com/v2/resize:fit:1400/1*2Riz-Zl0U9TgDjKyyVn_Dw.png" type="image/png" />
<enclosure url="https://objectstore.e2enetworks.net/e2eblog/jl/2025/blogs/speculative-decoding-with-vllm/speculative-decoding-draft-model-workflow.webp" type="image/webp" />
<enclosure url="https://objectstore.e2enetworks.net/e2eblog/jl/2025/blogs/speculative-decoding-with-vllm/eagle-1-2-3-speculative-decoding-comparison.webp" type="image/webp" />
<content:encoded><![CDATA[<p><img src="https://substackcdn.com/image/fetch/$s_!pcG-!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f2f57bb-ed18-40b1-b910-dcc2f4f94c7e_2679x1005.png" alt="Generation at the Speed of Thought: Speculative Decoding"></p> <p><img src="https://neurips.cc/media/PosterPDFs/NeurIPS%202025/115453.png?t=1762494056.9963334" alt="SuffixDecoding: Extreme Speculative Decoding for Emerging AI Applications | Gabriele Oliaro"></p> <p><img src="x-raw-image:///b274a2c43d07fb0c96c2056a340f197326fb73d1fd2d945d9db79918b4fda8cb" alt="Graph-Structured Speculative Decoding"></p> <p><img src="https://raw.githubusercontent.com/omkaark/omkaark.github.io/refs/heads/main/public/7-spec-decode/draft-and-verify.png?raw=true" alt="Decisive guide on Speculative Decoding - Omkaar Kamath"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/improving-multi-candidate-speculative-decoding-2.png" alt="Literature Review] Improving Multi-candidate Speculative Decoding"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/polybasic-speculative-decoding-through-a-theoretical-perspective-2.png" alt="Literature Review] Polybasic Speculative Decoding Through a Theoretical Perspective"></p> <p><img src="https://i.ytimg.com/vi/Qh9cIEelCj4/maxresdefault.jpg" alt="Speculative Decoding: 3× Faster LLM Inference with Zero Quality Loss"></p> <p><img src="https://newsroom.intel.com/wp-content/uploads/2025/07/speculative-decoding-algorithm-03.jpg" alt="Intel and Weizmann Institute Speed AI with Speculative Decoding Advance - Intel Newsroom"></p> <p><img src="https://rocm.blogs.amd.com/_images/withdraftmodel.png" alt="Speculative Decoding - Deep Dive — ROCm Blogs"></p> <p><img src="https://media.licdn.com/dms/image/v2/D5612AQEXlGkcvw4NLQ/article-cover_image-shrink_720_1280/B56ZiNwaknG0AI-/0/1754724947698?e=2147483647&v=beta&t=tJ8zA2QrsGSvmkARnmMfB-4X3xgQb1n0ccxE6CLX6dg" alt="🚀 Speculative Decoding: Making LLMs Think Faster Without Losing Accuracy"></p> <p><img src="https://docs-legacy.sambanova.ai/sambastudio/latest/_images/spec-decoding-playground-export-24-11-1.png" alt="Speculative decoding :: SambaNova Documentation"></p> <p><img src="https://neurips.cc/media/PosterPDFs/NeurIPS%202024/93418.png?t=1733792005.6765432" alt="NeurIPS Poster Sequoia: Scalable and Robust Speculative Decoding"></p> <p><img src="https://developer-blogs.nvidia.com/wp-content/uploads/2025/09/speculative-decoding-generation-with-without.gif" alt="An Introduction to Speculative Decoding for Reducing Latency in AI Inference | NVIDIA Technical Blog"></p> <p><img src="https://miro.medium.com/v2/resize:fit:1400/1*hApzZiGSrRQULLaXK7PVLg.jpeg" alt="Boosting LLM Inference Speed Using Speculative Decoding | by Het Trivedi | TDS Archive | Medium"></p> <p><img src="https://lmsys.org/images/blog/spec_forge/offline_online.jpg" alt="SpecForge: Accelerating Speculative Decoding Training for SGLang | LMSYS Org"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/specextend-a-drop-in-enhancement-for-speculative-decoding-of-long-sequences-1.png" alt="Literature Review] SpecExtend: A Drop-in Enhancement for Speculative Decoding of Long Sequences"></p> <p><img src="https://lh7-us.googleusercontent.com/fNgMN4fgY_0S1ayBE30bsYtKf_Ogex0RnWcVuH_zOOo8VqICS5pZNalBPNeavm18ccxPY8HsVA_BlX2rMx4IoeJoYOoMMK4jqnqUKmTNErXYTjFZc-pDC0v0YaNVKEduUvRAy72gaw9jdABlI3kgWg0" alt="This AI Paper Unveils the Potential of Speculative Decoding for Faster Large Language Model Inference: A Comprehensive Analysis - MarkTechPost"></p> <p><img src="https://www.infocusp.com/blogs/speculative-decoding/images/speculative-decoding.webp" alt="Blogs - Speculative Decoding"></p> <p><img src="https://i.ytimg.com/vi/S-8yr_RibJ4/maxresdefault.jpg" alt="Speculative Decoding: When Two LLMs are Faster than One"></p> <p><img src="https://media.licdn.com/dms/image/v2/D4E12AQHfBHzkRE3JWQ/article-cover_image-shrink_720_1280/B4EZqERWaAKYAI-/0/1763155732674?e=2147483647&v=beta&t=6p2l-K8XfcSPKHu4L-_TdHiVWMmGLs8q43DGjG5Tsrc" alt="Efficiently Serving LLMs (Part 3): How Speculative Decoding Boosts Decode Speed"></p> <p><img src="https://developer-blogs.nvidia.com/wp-content/uploads/2025/09/speculative-decoding-verification-phase-target-model.gif" alt="An Introduction to Speculative Decoding for Reducing Latency in AI Inference | NVIDIA Technical Blog"></p> <p><img src="https://blog.vllm.ai/assets/figures/spec-decode/figure8.png" alt="How Speculative Decoding Boosts vLLM Performance by up to 2.8x | vLLM Blog"></p> <p><img src="https://miro.medium.com/v2/resize:fit:1400/1*cKEACKReWvmo3KL3giHcRA.png" alt="Speculative Decoding — Make LLM Inference Faster | Medium | AI Science"></p> <p><img src="https://camo.githubusercontent.com/b03abafad4d5bbc3c6aa148ec7ac910c3921c983163f1a2a88e6d3d637489c9b/68747470733a2f2f6769746875622e636f6d2f757365722d6174746163686d656e74732f6173736574732f36396635633039362d616263612d346639372d393532622d323931633532656233343434" alt="openvino_notebooks/notebooks/speculative-sampling/speculative-sampling.ipynb at latest · openvinotoolkit/openvino_notebooks · GitHub"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/pearl-parallel-speculative-decoding-with-adaptive-draft-length-1.png" alt="Literature Review] PEARL: Parallel Speculative Decoding with Adaptive Draft Length"></p> <p><img src="https://i0.wp.com/novita-blog.s3.ap-southeast-1.amazonaws.com/will-speculative-decoding-harm-llm-inference-accuracy-QQ_1724643834718.png?resize=1278%2C1838&ssl=1" alt="Will Speculative Decoding Harm LLM Inference Accuracy? - Novita"></p> <p><img src="https://developer-blogs.nvidia.com/wp-content/uploads/2024/12/speculative-decoding-workflow-1024x885.jpg" alt="TensorRT-LLM Speculative Decoding Boosts Inference Throughput by up to 3.6x | NVIDIA Technical Blog"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!6DkQ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F708a883b-1188-4829-9ab4-079a1ecd8450_970x1610.png" alt="Speculative Decoding for LLM - by Bugra Akyildiz"></p> <p><img src="https://www.cs.cmu.edu/~csd-phd-blog/2025/suffix-decoding/fig0.webp" alt="CMU CSD PhD Blog - SuffixDecoding: Extreme Speculative Decoding for Emerging AI Applications"></p> <p><img src="https://philkrav.com/spec-dist.png" alt="Speculative Decoding - philkrav"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!xssb!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2a6e4bb-3829-4695-8216-cde0937ce3f3_1215x662.png" alt="A Survey of Speculative Decoding Techniques in LLM Inference"></p> <p><img src="https://miro.medium.com/v2/resize:fit:1400/1*eux4zLCCN_3_T6z1x8Fbqg.jpeg" alt="Boosting LLM Inference Speed Using Speculative Decoding | by Het Trivedi | TDS Archive | Medium"></p> <p><img src="https://miro.medium.com/v2/resize:fit:1400/1*QQdHHW85TllZZ96ur0GTxg.png" alt="Unlocking Efficiency: Speculative Decoding with Transferable Vocabulary Tool | by Minyang Chen | Medium"></p> <p><img src="https://preview.redd.it/this-is-how-speculative-decoding-speeds-the-model-up-v0-prpbobebf07e1.png?width=1536&format=png&auto=webp&s=5ae0a47ebe93cfcb1f687d24bba65cfce706a660" alt="This is How Speculative Decoding Speeds the Model up : r/LocalLLaMA"></p> <p><img src="https://docs-legacy.sambanova.ai/sambastudio/latest/_images/sd-pair-endpoint-24-11-1.png" alt="Speculative decoding :: SambaNova Documentation"></p> <p><img src="https://charlesxu.io/assets/images/speculative-decoding/medusa.png" alt="Accelerate LLM Inference with Speculative Decoding | Charles Xu"></p> <p><img src="https://media.licdn.com/dms/image/v2/D4D12AQFP6D3pB3n4_w/article-cover_image-shrink_720_1280/B4DZc.4lcxHAAM-/0/1749106710836?e=2147483647&v=beta&t=1L4rEevVLVPAmU8Y2Te3VHwdAF-8BblHc_bbF8y1Rbo" alt="All You Need to Know About Speculative Decoding"></p> <p><img src="https://blog.vllm.ai/assets/figures/spec-decode/figure10.png" alt="How Speculative Decoding Boosts vLLM Performance by up to 2.8x | vLLM Blog"></p> <p><img src="https://ma.slideslive.com/library/presentations/39027731/thumbnail/specexec-massively-parallel-speculative-decoding-for-interactive-llm-inference-on-consumer-devices_Mt8PYH_big.png" alt="SpecExec: Massively Parallel Speculative Decoding For Interactive LLM Inference on Consumer Devices"></p> <p><img src="https://blog.vllm.ai/assets/figures/spec-decode/figure9.png" alt="How Speculative Decoding Boosts vLLM Performance by up to 2.8x | vLLM Blog"></p> <p><img src="https://bentoml.com/llm/assets/images/tp-1-spec-decoding-dc775d27386402a221febdaf8997e861.png" alt="Speculative decoding | LLM Inference Handbook"></p> <p><img src="https://rocm.blogs.amd.com/_images/InferenceLatencySpeedup70B.png" alt="Speculative Decoding - Deep Dive — ROCm Blogs"></p> <p><img src="https://clova.ai/_next/image?url=%2Fcdn%2Fmedia%2F2025%2F08%2Ftechblog_E18490E185A6E1848FE185B3E18487E185B3E186AFE18485E185A9E18480E185B3E18492E185A9E186B7_1280X800-2.png&w=3840&q=75" alt="Speculative Decoding - Tag | CLOVA"></p> <p><img src="https://cdn.prod.website-files.com/67d1b10ea1804fdfad7d7a65/67d1b10ea1804fdfad7d7c1e_cache-c5451304ba262c2a1fcef810ee141b64.webp" alt="Doubleword | In the fast lane! Speculative decoding - 10x larger model, no extra cost"></p> <p><img src="https://research-website-prod-cms-uploads.s3.us.cloud-object-storage.appdomain.cloud/Speculativedecoding_Final_ee8ae68115.png" alt="Speculative decoding: cost-effective AI inferencing - IBM Research"></p> <p><img src="https://rocm.blogs.amd.com/_images/spd_concept3.png" alt="Accelerating LLM Inference: Up to 3x Speedup on MI300X with Speculative Decoding — ROCm Blogs"></p> <p><img src="https://towardsdatascience.com/wp-content/uploads/2024/12/1sYU-r355eE8LL8ug8tngmQ.png" alt="Combining Large and Small LLMs to Boost Inference Time and Quality | Towards Data Science"></p> <p><img src="https://www.marktechpost.com/wp-content/uploads/2024/04/Screenshot-2024-04-20-at-1.37.08-PM.png" alt="Researchers at CMU Introduce TriForce: A Hierarchical Speculative Decoding AI System that is Scalable to Long Sequence Generation - MarkTechPost"></p> <p><img src="https://lmstudio.ai/assets/docs/speculative-decoding-setting.png" alt="Speculative Decoding | LM Studio Docs"></p> <p><img src="https://company.hpc-ai.com/hs-fs/hubfs/image%20(5)-3.png?width=1262&height=668&name=image%20(5)-3.png" alt="SGLang Speculative Decoding Tutorial: How to Deploy DeepSeek Models and Achieve 1.4× Throughput – With Benchmarks"></p> <p><img src="https://pbs.twimg.com/media/G-_JcEPWoAAVyMM.jpg" alt="Speculative Decoding but with Discrete Diffusion?! This paper SpecDiff-2 replaces the autoregressive drafter in speculative decoding with a discrete diffusion model that drafts whole token blocks in parallel in a few denoising"></p> <p><img src="https://developer-blogs.nvidia.com/wp-content/uploads/2025/09/speculative-decoding-eagle-drafting-mechanism.gif" alt="An Introduction to Speculative Decoding for Reducing Latency in AI Inference | NVIDIA Technical Blog"></p> <p><img src="https://www.cs.cmu.edu/~csd-phd-blog/2025/suffix-decoding/fig1.png" alt="CMU CSD PhD Blog - SuffixDecoding: Extreme Speculative Decoding for Emerging AI Applications"></p> <p><img src="https://scale-ml.org/posts/images/speculative_decoding/img2.png" alt="Speculative decoding |"></p> <p><img src="https://docs-legacy.sambanova.ai/sambastudio/latest/_images/spec-decoding-select-model-24-11-1.png" alt="Speculative decoding :: SambaNova Documentation"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!ckwS!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b48e1ee-25eb-4cd6-9946-1b66c432b0dc_1351x1150.png" alt="A Survey of Speculative Decoding Techniques in LLM Inference"></p> <p><img src="https://jamchang.com/notes/speculative-decoding.webp" alt="Jam Notes - Speculative Decoding"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!Zk5D!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2067c97f-4d7a-4e49-ad16-6b0f23828360_1359x1869.png" alt="Speculative Decoding: Unlocking Faster Inference in Transformers"></p> <p><img src="https://objectstore.e2enetworks.net/e2eblog/jl/2025/blogs/speculative-decoding-with-vllm/speculative-decoding-vllm-cover.webp" alt="Speculative Decoding in vLLM: Complete Guide to Faster LLM Inference | Jarvislabs.ai Docs"></p> <p><img src="https://multpletokensprediction.github.io/multipletokensprediction.github.io/static/images/fina_1.png" alt="Accelerating Codec-based Speech Synthesis with Multi-Token Prediction and Speculative Decoding"></p> <p><img src="https://www.marktechpost.com/wp-content/uploads/2023/09/Screenshot-2023-09-21-at-10.50.50-PM.png" alt="Researchers from UCI and Zhejiang University Introduce Lossless Large Language Model Acceleration via Self-Speculative Decoding Using Drafting And Verifying Stages - MarkTechPost"></p> <p><img src="http://www.hanneshapke.com/images/speculative_decoding/speculative_decoding_comparison.png" alt="Speculative Decoding with vLLM"></p> <p><img src="https://bentoml.com/llm/assets/images/spec-decoding-c8daf3401db4d98f369b95e6e725165c.png" alt="Speculative decoding | LLM Inference Handbook"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!wXSz!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86d97024-948b-4018-a7d9-a091bd2ba9a6_4230x1833.png" alt="Speculative Decoding for LLM - by Bugra Akyildiz"></p> <p><img src="https://storage.googleapis.com/gweb-research2023-media/original_images/SpecCascades-1-TradeOffs.png" alt="Speculative cascades — A hybrid approach for smarter, faster LLM inference"></p> <p><img src="https://arxiv.org/html/2408.11850v1/x1.png" alt="Parallel Speculative Decoding with Adaptive Draft Length | AI Research Paper Details"></p> <p><img src="https://i0.wp.com/novita-blog.s3.ap-southeast-1.amazonaws.com/will-speculative-decoding-harm-llm-inference-accuracy-QQ_1724643566358.png?resize=1176%2C706&ssl=1" alt="Will Speculative Decoding Harm LLM Inference Accuracy? - Novita"></p> <p><img src="https://blog.vllm.ai/assets/figures/spec-decode/figure2.png" alt="How Speculative Decoding Boosts vLLM Performance by up to 2.8x | vLLM Blog"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!aTD5!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7fb4cef4-a491-485e-9657-e8b54667c406_2552x1092.png" alt="Speculative Decoding for LLM - by Bugra Akyildiz"></p> <p><img src="https://pic4.zhimg.com/v2-cd8e80c2db1d2391bb0af68675de2749_1440w.jpg" alt="EAGLE: Speculative Sampling Requires Rethinking Feature Uncertainty - 知乎"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/poss-position-specialist-generates-better-draft-for-speculative-decoding-2.png" alt="Literature Review] POSS: Position Specialist Generates Better Draft for Speculative Decoding"></p> <p><img src="https://i.ytimg.com/vi/kw3ki7HqW4I/maxresdefault.jpg" alt="Speculative Decoding Explained"></p> <p><img src="https://pic4.zhimg.com/v2-20b398df5cb95ad0f813fafd34eaa205_1440w.jpg" alt="LLM之Speculative Decoding实战- 知乎"></p> <p><img src="https://image.inblog.dev/?url=https%3A%2F%2Fwww.notion.so%2Fimage%2Fhttps%253A%252F%252Fprod-files-secure.s3.us-west-2.amazonaws.com%252F23f4b38d-2def-440d-b962-b485f3d7fb97%252F74d4c429-a6f6-4f92-bb63-b737ff98847b%252Fspec_dec.png%253FspaceId%253D23f4b38d-2def-440d-b962-b485f3d7fb97%3Ftable%3Dblock%26id%3D157258ac-0943-80b4-987d-c4e04383f6a1%26cache%3Dv2&w=1920&q=75" alt="vLLM vs TensorRT-LLM] #11. Speculative Decoding - The official SqueezeBits Tech blog"></p> <p><img src="https://framerusercontent.com/images/j4dlcpc1aV0lSMGXrOSAobi7DSs.png?width=3840&height=2582" alt="Accelerating Sonar Through Speculation"></p> <p><img src="https://icml.cc/media/PosterPDFs/ICML%202025/43675.png?t=1751562839.2291448" alt="ICML Poster Accelerating LLM Inference with Lossless Speculative Decoding Algorithms for Heterogeneous Vocabularies"></p> <p><img src="https://scale-ml.org/posts/images/speculative_decoding/img1.png" alt="Speculative decoding |"></p> <p><img src="https://lookaside.instagram.com/seo/google_widget/crawler/?media_id=3788095496121434790" alt="Faster AI without cutting corners. Speculative decoding is redefining how large language models generate text, combining speed, precision, and scalability. No architecture change. No accuracy loss. Just smarter inference. [LLMs, AIOptimization ..."></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/mirror-speculative-decoding-breaking-the-serial-barrier-in-llm-inference-2.png" alt="Literature Review] Mirror Speculative Decoding: Breaking the Serial Barrier in LLM Inference"></p> <p><img src="https://paper-assets.alphaxiv.org/figures/2506.19830v1/LookaheadReasoningStep.jpg" alt="Scaling Speculative Decoding with Lookahead Reasoning | alphaXiv"></p> <p><img src="https://picx.zhimg.com/v2-ea17477fef43d3257c250bf202ab9531_1440w.jpg" alt="Speculative Decoding 论文阅读合订本- 知乎"></p> <p><img src="https://blog.vllm.ai/assets/figures/spec-decode/figure1.png" alt="How Speculative Decoding Boosts vLLM Performance by up to 2.8x | vLLM Blog"></p> <p><img src="https://veryunknown.com/post/speculative-sampling/speculative-sampling-probabilities.png" alt="Speculative Sampling Trick for Large Language Model Decoding - VeryUnknown"></p> <p><img src="https://charlesxu.io/assets/images/speculative-decoding/cover.png" alt="Accelerate LLM Inference with Speculative Decoding | Charles Xu"></p> <p><img src="https://lookaside.instagram.com/seo/google_widget/crawler/?media_id=3788095495601362200" alt="Faster AI without cutting corners. Speculative decoding is redefining how large language models generate text, combining speed, precision, and scalability. No architecture change. No accuracy loss. Just smarter inference. [LLMs, AIOptimization ..."></p> <p><img src="https://clova.ai/cdn/media/2025/08/231.png" alt="Breaking the speed barrier: How we implemented speculative decoding for HyperCLOVA X | CLOVA"></p> <p><img src="https://lmsys.org/images/blog/spec_forge/eagleintro.PNG" alt="SpecForge: Accelerating Speculative Decoding Training for SGLang | LMSYS Org"></p> <p><img src="https://velog.velcdn.com/images/2mini/post/add60b28-2be1-4ae8-a0dd-a26f3989ec39/image.png" alt="paper review] Unlocking Efficiency in Large Language Model Inference : A Comprehensive Survey of Speculative Decoding"></p> <p><img src="https://miro.medium.com/v2/resize:fit:1400/1*8XyRP1OkwC1tMO0_l6WEvw.png" alt="Speculative Decoding and Self-Speculative Decoding: Faster Approaches to Large Language Model Generation | by Isaac Kargar | Medium"></p> <p><img src="https://miro.medium.com/v2/resize:fit:1400/1*2Riz-Zl0U9TgDjKyyVn_Dw.png" alt="Speculative Decoding: Free Tokens Without Extra GPUs | by Hash Block | Medium"></p> <p><img src="https://objectstore.e2enetworks.net/e2eblog/jl/2025/blogs/speculative-decoding-with-vllm/speculative-decoding-draft-model-workflow.webp" alt="Speculative Decoding in vLLM: Complete Guide to Faster LLM Inference | Jarvislabs.ai Docs"></p> <p><img src="https://objectstore.e2enetworks.net/e2eblog/jl/2025/blogs/speculative-decoding-with-vllm/eagle-1-2-3-speculative-decoding-comparison.webp" alt="Speculative Decoding in vLLM: Complete Guide to Faster LLM Inference | Jarvislabs.ai Docs"></p>]]></content:encoded>
</item><item>
<title>Speculative diffusion decoding accelerating language generation through diffusion (97) 사진</title>
<link>https://servistopauto.ru/speculative-rag/3179-Speculative-diffusion-decoding-accelerating-language-generation-through-diffusion-97-sajin.html</link>
<pdalink>https://servistopauto.ru/speculative-rag/3179-Speculative-diffusion-decoding-accelerating-language-generation-through-diffusion-97-sajin.html</pdalink>
<guid>3179</guid>
<pubDate>Thu, 19 Feb 2026 20:32:27 +0300</pubDate>
<category>native-yes</category>

<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/diffusion-language-models-know-the-answer-before-decoding-3.png" type="image/png" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/latent-refinement-decoding-enhancing-diffusion-based-language-models-by-refining-belief-states-2.png" type="image/png" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/flash-latent-aware-semi-autoregressive-speculative-decoding-for-multimodal-tasks-3.png" type="image/png" />
<enclosure url="https://media.springernature.com/lw1200/springer-static/image/art%3A10.1007%2Fs10462-025-11423-3/MediaObjects/10462_2025_11423_Fig8_HTML.png" type="image/png" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!AT0_!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6abce6a4-6f56-4c99-b0f1-b9caa7bd0da6_2576x1532.png" type="image/png" />
<enclosure url="https://www.preprints.org/frontend/picture/ms_xml/manuscript/9467916cff347f1f274406f41e2e95b1/preprints-145866-g012.png" type="image/png" />
<enclosure url="https://miro.medium.com/v2/resize:fit:1400/1*9P082mFNCEjfsipPVFePVQ.png" type="image/png" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!6VnI!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5224d252-e30f-4d25-ac7a-ee07f856fc73_1700x2200.png" type="image/png" />
<enclosure url="https://cdn-uploads.huggingface.co/production/uploads/62a8fa984d933c74bf410c16/VDDCsutVgInVtJlvVV2WJ.png" type="image/png" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/polybasic-speculative-decoding-through-a-theoretical-perspective-2.png" type="image/png" />
<enclosure url="https://www.marktechpost.com/wp-content/uploads/2024/11/Screenshot-2024-11-13-at-7.58.11%E2%80%AFAM.png" type="image/png" />
<enclosure url="https://miro.medium.com/v2/resize:fit:1358/format:webp/1*6BUW0tq2OFQShZBWf3-Fog.png" type="image/png" />
<enclosure url="https://pbs.twimg.com/media/G5MqidCXcAAqmvc.jpg" type="image/jpeg" />
<enclosure url="https://i1.rgstatic.net/publication/368159584_Accelerating_Large_Language_Model_Decoding_with_Speculative_Sampling/links/63dc7cc462d2a24f92f0351a/largepreview.png" type="image/png" />
<enclosure url="https://lmsys.org/images/blog/dllm/preview.png" type="image/png" />
<enclosure url="https://neurips2024-enlsp.github.io/images/posters/1.png" type="image/png" />
<enclosure url="https://i1.rgstatic.net/publication/394262235_Diffusion-based_Large_Language_Models_Survey/links/68a33b261bee4d42a24082d6/largepreview.png" type="image/png" />
<enclosure url="https://miro.medium.com/v2/resize:fit:1400/1*8XyRP1OkwC1tMO0_l6WEvw.png" type="image/png" />
<enclosure url="https://objectstore.e2enetworks.net/e2eblog/jl/2025/blogs/speculative-decoding-with-vllm/speculative-decoding-vllm-cover.webp" type="image/webp" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!qRpY!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0d338fa1-4ac3-4c15-b1e6-67bbb2739967_2278x1290.png" type="image/png" />
<enclosure url="https://miro.medium.com/v2/resize:fit:1200/1*SbbcoMMTKwYITizveLAAvw.png" type="image/png" />
<enclosure url="https://d2jud02ci9yv69.cloudfront.net/2025-04-28-bridging-the-parallel-decoding-of-llms-with-the-diffusion-process-63/assets/img/2025-04-28-bridging-the-parallel-decoding-of-llms-with-the-diffusion-process/diffu_lm2.png" type="image/png" />
<enclosure url="https://cdn.bytez.com/mobilePapers/v2/icml/45820/images/3-0.png" type="image/png" />
<enclosure url="https://cf-images.us-east-1.prod.boltdns.net/v1/jit/1414329538001/c70b29e9-fe5e-4e24-9ee4-6487616232f4/main/1280x720/15s61ms/match/image.jpg" type="image/jpeg" />
<enclosure url="https://www.marktechpost.com/wp-content/uploads/2025/07/Screenshot-2025-07-16-at-3.59.05-PM.png" type="image/png" />
<enclosure url="https://pbs.twimg.com/media/Gtc0G_HXEAA1t4O.jpg" type="image/jpeg" />
<enclosure url="https://d2jud02ci9yv69.cloudfront.net/2025-04-28-bridging-the-parallel-decoding-of-llms-with-the-diffusion-process-63/assets/img/2025-04-28-bridging-the-parallel-decoding-of-llms-with-the-diffusion-process/jacobi-decoding.png" type="image/png" />
<enclosure url="https://quantumzeitgeist.com/wp-content/uploads/Image_fx-71-11.jpg" type="image/jpeg" />
<enclosure url="https://csdl-images.ieeecomputer.org/trans/tp/2025/09/figures/zhou9-3569700.gif" type="image/gif" />
<enclosure url="https://www.marktechpost.com/wp-content/uploads/2024/04/Screenshot-2024-04-20-at-1.37.08-PM.png" type="image/png" />
<enclosure url="https://www.marktechpost.com/wp-content/uploads/2024/09/Screenshot-2024-09-02-at-7.57.08-AM.png" type="image/png" />
<enclosure url="https://www.mdpi.com/electronics/electronics-14-02188/article_deploy/html/images/electronics-14-02188-g001.png" type="image/png" />
<enclosure url="https://csdl-images.ieeecomputer.org/trans/tp/2025/09/figures/zhou3-3569700.gif" type="image/gif" />
<enclosure url="https://miro.medium.com/v2/resize:fit:1400/1*Z696gMLDIYb2qNitKqjPQg.png" type="image/png" />
<enclosure url="https://developer-blogs.nvidia.com/wp-content/uploads/2024/12/speculative-decoding-workflow-1024x885.jpg" type="image/jpeg" />
<enclosure url="https://cdn.bytez.com/mobilePapers/v2/icml/45820/images/7-0.png" type="image/png" />
<enclosure url="https://csdl-images.ieeecomputer.org/trans/tp/2025/09/figures/zhou10-3569700.gif" type="image/gif" />
<enclosure url="https://lmsys.org/images/blog/laattention/acc-demo.gif" type="image/gif" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!Cx6I!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c7d1b1d-d482-4ce1-bd3b-f7399f67dadb_2026x1144.png" type="image/png" />
<enclosure url="https://miro.medium.com/v2/resize:fit:1358/format:webp/1*nL3_E5DHx6aLuFP2nxIvzw.png" type="image/png" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!449j!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7dcdd662-c04b-4d83-ac11-8c7840a45545_2482x1128.png" type="image/png" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/specdiff-accelerating-diffusion-model-inference-with-self-speculation-1.png" type="image/png" />
<enclosure url="https://www.qualcomm.com/content/dam/qcomm-martech/dm-assets/images/blog/ai-product/full-stack-ai-optimization-for-lvm.jpg" type="image/jpeg" />
<enclosure url="https://cdn-uploads.huggingface.co/production/uploads/65c2710dc79c1a6e4d22734d/kB7OTGVsC1DPMIV2femsf.png" type="image/png" />
<enclosure url="https://www.preprints.org/frontend/picture/ms_xml/manuscript/9467916cff347f1f274406f41e2e95b1/preprints-145866-g001.png" type="image/png" />
<enclosure url="https://arxiv.org/html/2503.09790v1/x1.png" type="image/png" />
<enclosure url="https://csdl-images.ieeecomputer.org/trans/tp/2025/09/figures/zhou14-3569700.gif" type="image/gif" />
<enclosure url="https://csdl-images.ieeecomputer.org/trans/tp/2025/09/figures/zhou8-3569700.gif" type="image/gif" />
<enclosure url="https://miro.medium.com/v2/resize:fit:1400/1*WJCYUER0wVTnnGCpDw7zAA.png" type="image/png" />
<enclosure url="https://substackcdn.com/image/fetch/$s_!AASa!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F897ee242-463d-4046-866d-c5785c568214_2318x1256.png" type="image/png" />
<enclosure url="https://csdl-images.ieeecomputer.org/trans/tp/2025/09/figures/zhou5-3569700.gif" type="image/gif" />
<enclosure url="https://cdn.bytez.com/mobilePapers/v2/icml/45820/images/12-0.png" type="image/png" />
<enclosure url="https://cdn-uploads.huggingface.co/production/uploads/630139f1f6bea7dd15bdaf4e/8S4ujHUYnDd6leasY69yj.png" type="image/png" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/diffusion-language-models-know-the-answer-before-decoding-4.png" type="image/png" />
<enclosure url="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/speculative-diffusion-decoding-accelerating-language-generation-through-diffusion-1.png" type="image/png" />
<enclosure url="https://i.ytimg.com/vi/t3M3T21yY-Q/maxresdefault.jpg" type="image/jpeg" />
<enclosure url="https://hao-ai-lab.github.io/img/objective_illustration_global.jpg" type="image/jpeg" />
<enclosure url="https://www.marktechpost.com/wp-content/uploads/2023/09/Screenshot-2023-09-21-at-10.50.50-PM.png" type="image/png" />
<content:encoded><![CDATA[<p><img src="https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=4330150567256663" alt="Loopholing Discrete Diffusion: Deterministic Bypass of the Sampling Wall (KAIST, October 2025) Paper: [https://arxiv.org/abs/2510.19304](https://arxiv.org/abs/2510.19304) Abstract: "Discrete diffusion models offer a promising alternative to ..."></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/diffusion-language-models-know-the-answer-before-decoding-3.png" alt="论文评述] Diffusion Language Models Know the Answer Before Decoding"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/latent-refinement-decoding-enhancing-diffusion-based-language-models-by-refining-belief-states-2.png" alt="论文评述] Latent Refinement Decoding: Enhancing Diffusion-Based Language Models by Refining Belief States"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/flash-latent-aware-semi-autoregressive-speculative-decoding-for-multimodal-tasks-3.png" alt="Literature Review] FLASH: Latent-Aware Semi-Autoregressive Speculative Decoding for Multimodal Tasks"></p> <p><img src="https://media.springernature.com/lw1200/springer-static/image/art%3A10.1007%2Fs10462-025-11423-3/MediaObjects/10462_2025_11423_Fig8_HTML.png" alt="Knowledge distillation and dataset distillation of large language models: emerging trends, challenges, and future directions | Artificial Intelligence Review"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!AT0_!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6abce6a4-6f56-4c99-b0f1-b9caa7bd0da6_2576x1532.png" alt="🥇Top AI Papers of the Week - AI Newsletter"></p> <p><img src="https://camo.githubusercontent.com/530e0904add3b9665ad118921bcf019e09e20bcb9b15f640dd2f5a65f7ad9a86/68747470733a2f2f61727869762e6f72672f68746d6c2f323430362e303338353376312f78332e706e67" alt="Awesome-Efficient-LLM/inference_acceleration.md at main · horseee/Awesome-Efficient-LLM · GitHub"></p> <p><img src="https://www.preprints.org/frontend/picture/ms_xml/manuscript/9467916cff347f1f274406f41e2e95b1/preprints-145866-g012.png" alt="HeteroLLM: Accelerating Large Language Model Inference on Mobile SoCs with Heterogeneous AI Accelerators[v1] | Preprints.org"></p> <p><img src="https://miro.medium.com/v2/resize:fit:1400/1*9P082mFNCEjfsipPVFePVQ.png" alt="Important LLM Papers for the Week From 12/05 to 18/05 | by Youssef Hosni | Level Up Coding"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!6VnI!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5224d252-e30f-4d25-ac7a-ee07f856fc73_1700x2200.png" alt="2024년 8월 14일 - by Kim Seonghyeon - arXiv Daily"></p> <p><img src="https://cdn-uploads.huggingface.co/production/uploads/62a8fa984d933c74bf410c16/VDDCsutVgInVtJlvVV2WJ.png" alt="Paper page - Set Block Decoding is a Language Model Inference Accelerator"></p> <p><img src="https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=3622947677976959" alt="Accelerating LLM inference with speculative decoding"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/polybasic-speculative-decoding-through-a-theoretical-perspective-2.png" alt="Literature Review] Polybasic Speculative Decoding Through a Theoretical Perspective"></p> <p><img src="https://imgv2-2-f.scribdassets.com/img/document/897547885/original/68bf8154b7/1?v=1" alt="Judge Decoding Faster Spe | PDF | Learning | Applied Mathematics"></p> <p><img src="https://www.marktechpost.com/wp-content/uploads/2024/11/Screenshot-2024-11-13-at-7.58.11%E2%80%AFAM.png" alt="Researchers from Snowflake and CMU Introduce SuffixDecoding: A Novel Model-Free Approach to Accelerating Large Language Model (LLM) Inference through Speculative Decoding - MarkTechPost"></p> <p><img src="https://miro.medium.com/v2/resize:fit:1358/format:webp/1*6BUW0tq2OFQShZBWf3-Fog.png" alt="Speculative Decoding and Self-Speculative Decoding: Faster Approaches to Large Language Model Generation | by Isaac Kargar | Medium"></p> <p><img src="https://pbs.twimg.com/media/G5MqidCXcAAqmvc.jpg" alt="Nando Fioretto (✈️ @NeurIPS) (@nandofioretto) / Posts / X"></p> <p><img src="https://i1.rgstatic.net/publication/368159584_Accelerating_Large_Language_Model_Decoding_with_Speculative_Sampling/links/63dc7cc462d2a24f92f0351a/largepreview.png" alt="PDF) Accelerating Large Language Model Decoding with Speculative Sampling"></p> <p><img src="https://lmsys.org/images/blog/dllm/preview.png" alt="Blog | LMSYS Org"></p> <p><img src="https://neurips2024-enlsp.github.io/images/posters/1.png" alt="ENLSP NeurIPS Workshop 2024 | ENLSP highlights some fundamental problems in NLP and speech processing related to efficiency of the models, training and inference for the general ML and DL communities."></p> <p><img src="https://i1.rgstatic.net/publication/394262235_Diffusion-based_Large_Language_Models_Survey/links/68a33b261bee4d42a24082d6/largepreview.png" alt="PDF) Diffusion-based Large Language Models Survey"></p> <p><img src="https://miro.medium.com/v2/resize:fit:1400/1*8XyRP1OkwC1tMO0_l6WEvw.png" alt="Speculative Decoding and Self-Speculative Decoding: Faster Approaches to Large Language Model Generation | by Isaac Kargar | Medium"></p> <p><img src="https://objectstore.e2enetworks.net/e2eblog/jl/2025/blogs/speculative-decoding-with-vllm/speculative-decoding-vllm-cover.webp" alt="Speculative Decoding in vLLM: Complete Guide to Faster LLM Inference | Jarvislabs.ai Docs"></p> <p><img src="x-raw-image:///45ffafb8200200afb395150dca798e381eb425a8c7ac269ffe9854e9b28db5b5" alt="Accelerated Diffusion Models via Speculative Sampling"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!qRpY!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0d338fa1-4ac3-4c15-b1e6-67bbb2739967_2278x1290.png" alt="🥇Top AI Papers of the Week - AI Newsletter"></p> <p><img src="https://miro.medium.com/v2/resize:fit:1200/1*SbbcoMMTKwYITizveLAAvw.png" alt="Important LLM Papers for the Week From 10/11 To 16/11 | by Youssef Hosni | Dec, 2025 | Towards AI"></p> <p><img src="x-raw-image:///fe1f96219269272b3adc527460e3d4cc450d2c12381b75892aac78c147716cef" alt="Accelerating Diffusion LLMs via Adaptive Parallel Decoding"></p> <p><img src="https://d2jud02ci9yv69.cloudfront.net/2025-04-28-bridging-the-parallel-decoding-of-llms-with-the-diffusion-process-63/assets/img/2025-04-28-bridging-the-parallel-decoding-of-llms-with-the-diffusion-process/diffu_lm2.png" alt="Bridging the Parallel Decoding of LLMs with the Diffusion Process | ICLR Blogposts 2025"></p> <p><img src="https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=4003858303219226" alt="Accelerating LLM inference with speculative decoding"></p> <p><img src="https://cdn.bytez.com/mobilePapers/v2/icml/45820/images/3-0.png" alt="ICML Poster Morse: Dual-Sampling for Lossless Acceleration of Diffusion Models"></p> <p><img src="https://cf-images.us-east-1.prod.boltdns.net/v1/jit/1414329538001/c70b29e9-fe5e-4e24-9ee4-6487616232f4/main/1280x720/15s61ms/match/image.jpg" alt="Edge AI LLM | Efficient On-Device Language | Qualcomm"></p> <p><img src="x-raw-image:///c105852153667bc08f705cc41617a02081c9fb15149bab34c8d5a44353bb59c7" alt="Speculative Diffusion Decoding: Accelerating Language Generation through Diffusion"></p> <p><img src="https://www.marktechpost.com/wp-content/uploads/2025/07/Screenshot-2025-07-16-at-3.59.05-PM.png" alt="Apple Introduces DiffuCoder: A 7B Diffusion LLM Tailored for Code Generation - MarkTechPost"></p> <p><img src="https://camo.githubusercontent.com/670e62b1fce905308156b2592b0608ffb447c1637bd8a031c7c12f60a0e2fa1a/68747470733a2f2f61727869762e6f72672f68746d6c2f323430332e313836343776312f6578747261637465642f353439353630362f73616d706c655f73747275747572652e706e67" alt="Awesome-Efficient-LLM/inference_acceleration.md at main · horseee/Awesome-Efficient-LLM · GitHub"></p> <p><img src="x-raw-image:///fe3bf5fa35f26ce4d3423f0a5cc4f4b812112dcfb6a279633d874cccd53ef10c" alt="Accelerating Diffusion LLMs via Adaptive Parallel Decoding"></p> <p><img src="https://pbs.twimg.com/media/Gtc0G_HXEAA1t4O.jpg" alt="Nando Fioretto (✈️ @NeurIPS) (@nandofioretto) / Posts / X"></p> <p><img src="x-raw-image:///282031dd39e0dea0e99f8056714cb634c4297d5a71d781bc9397a468e2d65e06" alt="Accelerated Diffusion Models via Speculative Sampling"></p> <p><img src="x-raw-image:///60806b0f050d6afb0a88e180cd497ab64a56149d7f761bd53fb481d969f1fa7a" alt="Accelerating Diffusion LLMs via Adaptive Parallel Decoding"></p> <p><img src="x-raw-image:///22b491a7d7f194faa11f3718c6e25f8d40c5c3328198c4b8a84b3a4bd81c3199" alt="JUDGE DECODING: FASTER SPECULATIVE SAMPLING REQUIRES GOING BEYOND MODEL ALIGNMENT"></p> <p><img src="https://d2jud02ci9yv69.cloudfront.net/2025-04-28-bridging-the-parallel-decoding-of-llms-with-the-diffusion-process-63/assets/img/2025-04-28-bridging-the-parallel-decoding-of-llms-with-the-diffusion-process/jacobi-decoding.png" alt="Bridging the Parallel Decoding of LLMs with the Diffusion Process | ICLR Blogposts 2025"></p> <p><img src="https://quantumzeitgeist.com/wp-content/uploads/Image_fx-71-11.jpg" alt="Failfast Advances Speculative Decoding, Leveraging Diffusion LLMs For Efficient Parallel Generation"></p> <p><img src="x-raw-image:///41773bcd93c32e4578b5e4f06d1bc7ab227435e5dd1498adf86735960f49ad7b" alt="CDLM: Consistency Diffusion Language Models For Faster Sampling"></p> <p><img src="x-raw-image:///c935c24fd26d6151532fa3d232d07561aa0116e7ef8e824e7e3a417b0e415f42" alt="JUDGE DECODING: FASTER SPECULATIVE SAMPLING REQUIRES GOING BEYOND MODEL ALIGNMENT"></p> <p><img src="x-raw-image:///5cac39cff78feacdb51001c41d01ae9fc3a3a9c53b51a9d79725db5797d774e8" alt="Accelerating Diffusion LLMs via Adaptive Parallel Decoding"></p> <p><img src="https://neurips.cc/media/PosterPDFs/NeurIPS%202025/115194.png?t=1764639405.212518" alt="NeurIPS Poster Accelerating Diffusion LLMs via Adaptive Parallel Decoding"></p> <p><img src="https://csdl-images.ieeecomputer.org/trans/tp/2025/09/figures/zhou9-3569700.gif" alt="Efficient Diffusion Models: A Comprehensive Survey From Principles to Practices"></p> <p><img src="https://chatdoc-arxiv.oss-us-west-1.aliyuncs.com/images/arxiv/2509.13136/first_image.jpeg?AWSAccessKeyId=LTAI5t6b2G8eTtEBczAMwjhc&Signature=AYF2lsb7yyLgI41MP7ivW4knQdg%3D&Expires=9223372038618759168" alt="Discovering Mathematical Equations with Diffusion Language Model"></p> <p><img src="https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=4087527844852271" alt="Accelerating LLM inference with speculative decoding"></p> <p><img src="https://www.marktechpost.com/wp-content/uploads/2024/04/Screenshot-2024-04-20-at-1.37.08-PM.png" alt="Researchers at CMU Introduce TriForce: A Hierarchical Speculative Decoding AI System that is Scalable to Long Sequence Generation - MarkTechPost"></p> <p><img src="https://www.marktechpost.com/wp-content/uploads/2024/09/Screenshot-2024-09-02-at-7.57.08-AM.png" alt="The Mamba in the Llama: Accelerating Inference with Speculative Decoding - MarkTechPost"></p> <p><img src="https://www.mdpi.com/electronics/electronics-14-02188/article_deploy/html/images/electronics-14-02188-g001.png" alt="A Unified and Resource-Aware Framework for Adaptive Inference Acceleration on Edge and Embedded Platforms"></p> <p><img src="x-raw-image:///d87514d569091ccb3b571b4b4d3bc271829248c5c639959b400017be07dab4b9" alt="CSC 412: Probabilistic Learning and Reasoning - Week 12: Speculative Decoding & Diffusion Models"></p> <p><img src="https://csdl-images.ieeecomputer.org/trans/tp/2025/09/figures/zhou3-3569700.gif" alt="Efficient Diffusion Models: A Comprehensive Survey From Principles to Practices"></p> <p><img src="x-raw-image:///c14fdf3ad67704747defe8d12803ae05150c327f95c5e45513bfb2a5d0d1e89d" alt="Accelerating Diffusion LLMs via Adaptive Parallel Decoding"></p> <p><img src="x-raw-image:///3fbc698eb98538b7856603b553363895b43ab33f22e4e9ee6ed0b8f5145febb7" alt="JUDGE DECODING: FASTER SPECULATIVE SAMPLING REQUIRES GOING BEYOND MODEL ALIGNMENT"></p> <p><img src="https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=4385679115037141" alt="ReFusion: A Diffusion Large Language Model with Parallel Autoregressive Decoding (Renmin University of China & Ant Group, December 2025) Paper: [https://arxiv.org/abs/2512.13586](https://arxiv.org/abs/2512.13586) Abstract: "Autoregressive models (ARMs ..."></p> <p><img src="https://miro.medium.com/v2/resize:fit:1400/1*Z696gMLDIYb2qNitKqjPQg.png" alt="Important LLM Papers for the Week From 12/05 to 18/05 | by Youssef Hosni | Level Up Coding"></p> <p><img src="https://camo.githubusercontent.com/a7114be73ef37e53f46988ebb624eb3b18c6cb5a602e1b3c8f06c41a31cdc30d/68747470733a2f2f61727869762e6f72672f68746d6c2f323430322e313537353876312f78312e706e67" alt="Awesome-Efficient-LLM/inference_acceleration.md at main · horseee/Awesome-Efficient-LLM · GitHub"></p> <p><img src="https://developer-blogs.nvidia.com/wp-content/uploads/2024/12/speculative-decoding-workflow-1024x885.jpg" alt="TensorRT-LLM Speculative Decoding Boosts Inference Throughput by up to 3.6x | NVIDIA Technical Blog"></p> <p><img src="https://cdn.bytez.com/mobilePapers/v2/icml/45820/images/7-0.png" alt="ICML Poster Morse: Dual-Sampling for Lossless Acceleration of Diffusion Models"></p> <p><img src="x-raw-image:///0d43fca2122c114f86d23674d596ca9a9ec627ef4d7d20fdb2963a06babb1e6a" alt="Efficient Diffusion Models: A Comprehensive Survey from Principles to Practices"></p> <p><img src="https://csdl-images.ieeecomputer.org/trans/tp/2025/09/figures/zhou10-3569700.gif" alt="Efficient Diffusion Models: A Comprehensive Survey From Principles to Practices"></p> <p><img src="https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=3085073008297422" alt="Institute for AI Industry Research & ByteDance Seed have released Seed Diffusion Preview, a diffusion-based #LLM with code reasoning speeds of up to 2,146 tokens per second. This #InnovativeTsinghua achievement offers significant"></p> <p><img src="https://lmsys.org/images/blog/laattention/acc-demo.gif" alt="Blog | LMSYS Org"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!Cx6I!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c7d1b1d-d482-4ce1-bd3b-f7399f67dadb_2026x1144.png" alt="🥇Top AI Papers of the Week - AI Newsletter"></p> <p><img src="x-raw-image:///c9dab01c3a8e3336a8fa7a44c775a162fd35fbd2671aa1f68ed8e4118e140f46" alt="Accelerated Diffusion Models via Speculative Sampling"></p> <p><img src="x-raw-image:///910cc1ebf6843ddf781f1624ea066692ef838d62f30d3d88bc4b54d670b4c26e" alt="Automatic Task Detection and Heterogeneous LLM Speculative Decoding"></p> <p><img src="https://miro.medium.com/v2/resize:fit:1358/format:webp/1*nL3_E5DHx6aLuFP2nxIvzw.png" alt="Speculative Decoding and Self-Speculative Decoding: Faster Approaches to Large Language Model Generation | by Isaac Kargar | Medium"></p> <p><img src="https://lh7-us.googleusercontent.com/fNgMN4fgY_0S1ayBE30bsYtKf_Ogex0RnWcVuH_zOOo8VqICS5pZNalBPNeavm18ccxPY8HsVA_BlX2rMx4IoeJoYOoMMK4jqnqUKmTNErXYTjFZc-pDC0v0YaNVKEduUvRAy72gaw9jdABlI3kgWg0" alt="This AI Paper Unveils the Potential of Speculative Decoding for Faster Large Language Model Inference: A Comprehensive Analysis - MarkTechPost"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!449j!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7dcdd662-c04b-4d83-ac11-8c7840a45545_2482x1128.png" alt="🥇Top AI Papers of the Week - AI Newsletter"></p> <p><img src="x-raw-image:///2da8579bdfefb916ed251564e26d8e070191d13831eaadae1e8ef697f06939c2" alt="JUDGE DECODING: FASTER SPECULATIVE SAMPLING REQUIRES GOING BEYOND MODEL ALIGNMENT"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/specdiff-accelerating-diffusion-model-inference-with-self-speculation-1.png" alt="Literature Review] SpecDiff: Accelerating Diffusion Model Inference with Self-Speculation"></p> <p><img src="https://www.qualcomm.com/content/dam/qcomm-martech/dm-assets/images/blog/ai-product/full-stack-ai-optimization-for-lvm.jpg" alt="Edge AI LLM | Efficient On-Device Language | Qualcomm"></p> <p><img src="https://neurips.cc/media/PosterPDFs/NeurIPS%202024/106484.png?t=1733286317.8840184" alt="NeurIPS Speculative Diffusion Decoding for Accelerated Language Generation"></p> <p><img src="https://cdn-uploads.huggingface.co/production/uploads/65c2710dc79c1a6e4d22734d/kB7OTGVsC1DPMIV2femsf.png" alt="Daily Papers - Hugging Face"></p> <p><img src="https://www.preprints.org/frontend/picture/ms_xml/manuscript/9467916cff347f1f274406f41e2e95b1/preprints-145866-g001.png" alt="HeteroLLM: Accelerating Large Language Model Inference on Mobile SoCs with Heterogeneous AI Accelerators[v1] | Preprints.org"></p> <p><img src="https://arxiv.org/html/2503.09790v1/x1.png" alt="Papers by Jacob K Christopher"></p> <p><img src="x-raw-image:///9c7df1c7dc16bdb4990eb1d54204c3933cde454b016af151074ade2e25794b78" alt="Diffusion-based Large Language Models Survey"></p> <p><img src="https://csdl-images.ieeecomputer.org/trans/tp/2025/09/figures/zhou14-3569700.gif" alt="Efficient Diffusion Models: A Comprehensive Survey From Principles to Practices"></p> <p><img src="x-raw-image:///0041462281bd858dd55008efca3f8ac877af5c797998237255843a3944310723" alt="Accelerating Diffusion LLMs via Adaptive Parallel Decoding"></p> <p><img src="https://csdl-images.ieeecomputer.org/trans/tp/2025/09/figures/zhou8-3569700.gif" alt="Efficient Diffusion Models: A Comprehensive Survey From Principles to Practices"></p> <p><img src="https://miro.medium.com/v2/resize:fit:1400/1*WJCYUER0wVTnnGCpDw7zAA.png" alt="Speculative Decoding and Self-Speculative Decoding: Faster Approaches to Large Language Model Generation | by Isaac Kargar | Medium"></p> <p><img src="https://substackcdn.com/image/fetch/$s_!AASa!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F897ee242-463d-4046-866d-c5785c568214_2318x1256.png" alt="🥇Top AI Papers of the Week - AI Newsletter"></p> <p><img src="x-raw-image:///2f81d3a047b8b23f70f179a3dc64d041ea64b077c6dd185ce0fcad59d8fbb9b7" alt="CSC 412: Probabilistic Learning and Reasoning - Week 12: Speculative Decoding & Diffusion Models"></p> <p><img src="https://csdl-images.ieeecomputer.org/trans/tp/2025/09/figures/zhou5-3569700.gif" alt="Efficient Diffusion Models: A Comprehensive Survey From Principles to Practices"></p> <p><img src="https://cdn.bytez.com/mobilePapers/v2/icml/45820/images/12-0.png" alt="ICML Poster Morse: Dual-Sampling for Lossless Acceleration of Diffusion Models"></p> <p><img src="https://cdn-uploads.huggingface.co/production/uploads/630139f1f6bea7dd15bdaf4e/8S4ujHUYnDd6leasY69yj.png" alt="Paper page - Planned Diffusion"></p> <p><img src="x-raw-image:///ec03dfb055727f7c275269d9dc8eb126c1578c1d7d192b901ca7309354191368" alt="Speculative Diffusion Decoding: Accelerating Language Generation through Diffusion"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/diffusion-language-models-know-the-answer-before-decoding-4.png" alt="论文评述] Diffusion Language Models Know the Answer Before Decoding"></p> <p><img src="https://moonlight-paper-snapshot.s3.ap-northeast-2.amazonaws.com/arxiv/speculative-diffusion-decoding-accelerating-language-generation-through-diffusion-1.png" alt="Literature Review] Speculative Diffusion Decoding: Accelerating Language Generation through Diffusion"></p> <p><img src="https://neurips.cc/media/PosterPDFs/NeurIPS%202025/118825.png?t=1762517603.0974364" alt="NeurIPS Poster ASDSV: Multimodal Generation Made Efficient with Approximate Speculative Diffusion and Speculative Verification"></p> <p><img src="https://i.ytimg.com/vi/t3M3T21yY-Q/maxresdefault.jpg" alt="Hyper-Bagel: Accelerating Multimodal Models"></p> <p><img src="https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=10161062355081465" alt="Accelerating LLM inference with speculative decoding"></p> <p><img src="https://hao-ai-lab.github.io/img/objective_illustration_global.jpg" alt="Consistency Large Language Models: A Family of Efficient Parallel Decoders | Hao AI Lab @ UCSD"></p> <p><img src="https://www.marktechpost.com/wp-content/uploads/2023/09/Screenshot-2023-09-21-at-10.50.50-PM.png" alt="Researchers from UCI and Zhejiang University Introduce Lossless Large Language Model Acceleration via Self-Speculative Decoding Using Drafting And Verifying Stages - MarkTechPost"></p> <p><img src="x-raw-image:///776e83abc20428ce3a508e9e7ba3a3109cfba84d114c0a175bf39c4d05128284" alt="Accelerated Diffusion Models via Speculative Sampling"></p>]]></content:encoded>
</item></channel></rss>