From 1f7df36c86f900a33b502f53fa96ea5ff63b91a2 Mon Sep 17 00:00:00 2001
From: Kornel <kornel@geekhood.net>
Date: Sat, 13 Jan 2024 12:59:28 +0000
Subject: [PATCH 1/4] Unused

---
 src/reader/decoder.rs | 2 --
 src/reader/mod.rs     | 6 ------
 2 files changed, 8 deletions(-)
diff --git a/src/reader/decoder.rs b/src/reader/decoder.rs
index fa544a0..417cd50 100644
--- a/src/reader/decoder.rs
+++ b/src/reader/decoder.rs
@@ -359,8 +359,6 @@ impl StreamingDecoder {
         mut buf: &[u8],
         write_into: &mut OutputBuffer<'_>,
     ) -> Result<(usize, Decoded), DecodingError> {
-        // NOTE: Do not change the function signature without double-checking the
-        //       unsafe block!
         let len = buf.len();
         while !buf.is_empty() {
             let (bytes, decoded) = self.next_state(buf, write_into)?;
diff --git a/src/reader/mod.rs b/src/reader/mod.rs
index 1d0f7dd..e73b445 100644
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -218,12 +218,6 @@ impl<R: Read> ReadDecoder<R> {
                     return Err(io::ErrorKind::UnexpectedEof.into());
                 }
 
-                // Dead code checks the lifetimes that the later mem::transmute can't.
-                #[cfg(test)]
-                if false {
-                    return self.decoder.update(buf, write_into).map(|(_, res)| Some(res));
-                }
-
                 self.decoder.update(buf, write_into)?
             };
             self.reader.consume(consumed);

From 1285484e8d10e5e73f1130acfe258d1febadc791 Mon Sep 17 00:00:00 2001
From: Kornel <kornel@geekhood.net>
Date: Sat, 13 Jan 2024 13:00:18 +0000
Subject: [PATCH 2/4] Inlining control

non-inline decode_next allows inlining all of its content
---
 src/reader/decoder.rs | 1 +
 src/reader/mod.rs     | 9 +++++++++
 2 files changed, 10 insertions(+)

diff --git a/src/reader/decoder.rs b/src/reader/decoder.rs
index 417cd50..aa0f551 100644
--- a/src/reader/decoder.rs
+++ b/src/reader/decoder.rs
@@ -414,6 +414,7 @@ impl StreamingDecoder {
         self.version
     }
 
+    #[inline]
     fn next_state(&mut self, buf: &[u8], write_into: &mut OutputBuffer<'_>) -> Result<(usize, Decoded), DecodingError> {
         macro_rules! goto (
             ($n:expr, $state:expr) => ({
diff --git a/src/reader/mod.rs b/src/reader/mod.rs
index e73b445..691fe49 100644
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -118,6 +118,7 @@ impl Default for DecodeOptions {
 impl DecodeOptions {
     /// Creates a new decoder builder
     #[must_use]
+    #[inline]
     pub fn new() -> DecodeOptions {
         DecodeOptions {
             memory_limit: MemoryLimit::Bytes(50_000_000.try_into().unwrap()), // 50 MB
@@ -130,6 +131,7 @@ impl DecodeOptions {
     }
 
     /// Configure how color data is decoded.
+    #[inline]
     pub fn set_color_output(&mut self, color: ColorOutput) {
         self.color_output = color;
     }
@@ -210,6 +212,7 @@ struct ReadDecoder<R: Read> {
 }
 
 impl<R: Read> ReadDecoder<R> {
+    #[inline(never)]
     fn decode_next(&mut self, write_into: &mut OutputBuffer<'_>) -> Result<Option<Decoded>, DecodingError> {
         while !self.at_eof {
             let (consumed, result) = {
@@ -253,12 +256,14 @@ pub struct Decoder<R: Read> {
 
 impl<R> Decoder<R> where R: Read {
     /// Create a new decoder with default options.
+    #[inline]
     pub fn new(reader: R) -> Result<Self, DecodingError> {
         DecodeOptions::new().read_info(reader)
     }
 
     /// Return a builder that allows configuring limits etc.
     #[must_use]
+    #[inline]
     pub fn build() -> DecodeOptions {
         DecodeOptions::new()
     }
@@ -519,6 +524,7 @@ impl<R> Decoder<R> where R: Read {
     }
 
     /// Returns the color palette relevant for the frame that has been decoded
+    #[inline]
     pub fn palette(&self) -> Result<&[u8], DecodingError> {
         Ok(match self.current_frame.palette {
             Some(ref table) => table,
@@ -534,11 +540,13 @@ impl<R> Decoder<R> where R: Read {
     }
 
     /// Width of the image
+    #[inline]
     pub fn width(&self) -> u16 {
         self.decoder.decoder.width()
     }
 
     /// Height of the image
+    #[inline]
     pub fn height(&self) -> u16 {
         self.decoder.decoder.height()
     }
@@ -557,6 +565,7 @@ impl<R> Decoder<R> where R: Read {
     }
 
     /// Number of loop repetitions
+    #[inline]
     pub fn repeat(&self) -> Repeat {
         self.repeat
     }

From c5deb27d5e391aa14408791002a506ad5caab090 Mon Sep 17 00:00:00 2001
From: Kornel <kornel@geekhood.net>
Date: Thu, 11 Jan 2024 00:52:12 +0000
Subject: [PATCH 3/4] Move deinterlacing and RGBA conversions

---
 src/reader/converter.rs | 234 ++++++++++++++++++++++++++++++++++++++++
 src/reader/mod.rs       | 227 +++++---------------------------------
 2 files changed, 263 insertions(+), 198 deletions(-)
 create mode 100644 src/reader/converter.rs

diff --git a/src/reader/converter.rs b/src/reader/converter.rs
new file mode 100644
index 0000000..bf0ea13
--- /dev/null
+++ b/src/reader/converter.rs
@@ -0,0 +1,234 @@
+use std::borrow::Cow;
+use std::io;
+use std::mem;
+use std::iter;
+use crate::common::Frame;
+use crate::MemoryLimit;
+
+use super::decoder::{
+    PLTE_CHANNELS, DecodingError, OutputBuffer
+};
+
+pub(crate) const N_CHANNELS: usize = 4;
+
+/// Output mode for the image data
+#[derive(Clone, Copy, Debug, PartialEq)]
+#[repr(u8)]
+pub enum ColorOutput {
+    /// The decoder expands the image data to 32bit RGBA.
+    /// This affects:
+    ///
+    ///  - The buffer buffer of the `Frame` returned by [`Decoder::read_next_frame`].
+    ///  - `Decoder::fill_buffer`, `Decoder::buffer_size` and `Decoder::line_length`.
+    RGBA = 0,
+    /// The decoder returns the raw indexed data.
+    Indexed = 1,
+}
+
+pub(crate) type FillBufferCallback<'a> = &'a mut dyn FnMut(&mut OutputBuffer<'_>) -> Result<usize, DecodingError>;
+
+/// Deinterlaces and expands to RGBA if needed
+pub(crate) struct PixelConverter {
+    memory_limit: MemoryLimit,
+    color_output: ColorOutput,
+    buffer: Vec<u8>,
+    global_palette: Option<Vec<u8>>,
+}
+
+impl PixelConverter {
+    pub(crate) fn new(color_output: ColorOutput, memory_limit: MemoryLimit) -> Self {
+        Self {
+            memory_limit,
+            color_output,
+            buffer: Vec::new(),
+            global_palette: None,
+        }
+    }
+
+    pub(crate) fn read_frame(&mut self, frame: &mut Frame<'_>, data_callback: FillBufferCallback<'_>) -> Result<(), DecodingError> {
+        let pixel_bytes = self.memory_limit
+            .buffer_size(self.color_output, frame.width, frame.height)
+            .ok_or_else(|| io::Error::new(io::ErrorKind::OutOfMemory, "image is too large"))?;
+
+        debug_assert_eq!(
+            pixel_bytes, self.buffer_size(frame),
+            "Checked computation diverges from required buffer size"
+        );
+
+        let mut vec = match mem::replace(&mut frame.buffer, Cow::Borrowed(&[])) {
+            // reuse buffer if possible without reallocating
+            Cow::Owned(mut vec) if vec.capacity() >= pixel_bytes => {
+                vec.resize(pixel_bytes, 0);
+                vec
+            },
+            // resizing would realloc anyway, and 0-init is faster than a copy
+            _ => vec![0; pixel_bytes],
+        };
+        self.read_into_buffer(frame, &mut vec, data_callback)?;
+        frame.buffer = Cow::Owned(vec);
+        frame.interlaced = false;
+        Ok(())
+    }
+
+    pub(crate) fn buffer_size(&self, frame: &Frame<'_>) -> usize {
+        self.line_length(frame) * frame.height as usize
+    }
+
+    pub(crate) fn line_length(&self, frame: &Frame<'_>) -> usize {
+        use self::ColorOutput::*;
+        match self.color_output {
+            RGBA => frame.width as usize * N_CHANNELS,
+            Indexed => frame.width as usize,
+        }
+    }
+
+    pub(crate) fn fill_buffer(&mut self, current_frame: &mut Frame<'_>, mut buf: &mut [u8], data_callback: FillBufferCallback<'_>) -> Result<bool, DecodingError> {
+        loop {
+            let decode_into = match self.color_output {
+                // When decoding indexed data, LZW can write the pixels directly
+                ColorOutput::Indexed => &mut buf[..],
+                // When decoding RGBA, the pixel data will be expanded by a factor of 4,
+                // and it's simpler to decode indexed pixels to another buffer first
+                ColorOutput::RGBA => {
+                    let buffer_size = buf.len() / N_CHANNELS;
+                    if buffer_size == 0 {
+                        return Err(DecodingError::format("odd-sized buffer"));
+                    }
+                    if self.buffer.len() < buffer_size {
+                        self.buffer.resize(buffer_size, 0);
+                    }
+                    &mut self.buffer[..buffer_size]
+                }
+            };
+            match data_callback(&mut OutputBuffer::Slice(decode_into))? {
+                0 => return Ok(false),
+                bytes_decoded => {
+                    match self.color_output {
+                        ColorOutput::RGBA => {
+                            let transparent = current_frame.transparent;
+                            let palette: &[u8] = current_frame.palette.as_deref()
+                                .or(self.global_palette.as_deref())
+                                .unwrap_or_default(); // next_frame_info already checked it won't happen
+
+                            let (pixels, rest) = buf.split_at_mut(bytes_decoded * N_CHANNELS);
+                            buf = rest;
+
+                            for (rgba, idx) in pixels.chunks_exact_mut(N_CHANNELS).zip(self.buffer.iter().copied().take(bytes_decoded)) {
+                                let plte_offset = PLTE_CHANNELS * idx as usize;
+                                if let Some(colors) = palette.get(plte_offset..plte_offset+PLTE_CHANNELS) {
+                                    rgba[0] = colors[0];
+                                    rgba[1] = colors[1];
+                                    rgba[2] = colors[2];
+                                    rgba[3] = if let Some(t) = transparent {
+                                        if t == idx { 0x00 } else { 0xFF }
+                                    } else {
+                                        0xFF
+                                    };
+                                }
+                            }
+                        },
+                        ColorOutput::Indexed => {
+                            buf = &mut buf[bytes_decoded..];
+                        }
+                    }
+                    if buf.is_empty() {
+                        return Ok(true);
+                    }
+                },
+            }
+        }
+    }
+
+    pub(crate) fn global_palette(&self) -> Option<&[u8]> {
+        self.global_palette.as_deref()
+    }
+
+    pub(crate) fn set_global_palette(&mut self, palette: Vec<u8>) {
+        self.global_palette = if !palette.is_empty() {
+            Some(palette)
+        } else {
+            None
+        };
+    }
+
+    pub(crate) fn read_into_buffer(&mut self, frame: &mut Frame<'_>, buf: &mut [u8], data_callback: FillBufferCallback<'_>) -> Result<(), DecodingError> {
+        if frame.interlaced {
+            let width = self.line_length(frame);
+            let height = frame.height as usize;
+            for row in (InterlaceIterator { len: height, next: 0, pass: 0 }) {
+                let start = row * width;
+                // Handle a too-small buffer without panicking
+                let line = buf.get_mut(start .. start + width).ok_or_else(|| DecodingError::format("buffer too small"))?;
+                if !self.fill_buffer(frame, line, data_callback)? {
+                    return Err(DecodingError::format("image truncated"));
+                }
+            }
+        } else {
+            let buf = buf.get_mut(..self.buffer_size(frame)).ok_or_else(|| DecodingError::format("buffer too small"))?;
+            if !self.fill_buffer(frame, buf, data_callback)? {
+                return Err(DecodingError::format("image truncated"));
+            }
+        };
+        Ok(())
+    }
+}
+
+struct InterlaceIterator {
+    len: usize,
+    next: usize,
+    pass: usize,
+}
+
+impl iter::Iterator for InterlaceIterator {
+    type Item = usize;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.len == 0 {
+            return None;
+        }
+        // although the pass never goes out of bounds thanks to len==0,
+        // the optimizer doesn't see it. get()? avoids costlier panicking code.
+        let mut next = self.next + *[8, 8, 4, 2].get(self.pass)?;
+        while next >= self.len {
+            debug_assert!(self.pass < 4);
+            next = *[4, 2, 1, 0].get(self.pass)?;
+            self.pass += 1;
+        }
+        mem::swap(&mut next, &mut self.next);
+        Some(next)
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::InterlaceIterator;
+
+    #[test]
+    fn test_interlace_iterator() {
+        for &(len, expect) in &[
+            (0, &[][..]),
+            (1, &[0][..]),
+            (2, &[0, 1][..]),
+            (3, &[0, 2, 1][..]),
+            (4, &[0, 2, 1, 3][..]),
+            (5, &[0, 4, 2, 1, 3][..]),
+            (6, &[0, 4, 2, 1, 3, 5][..]),
+            (7, &[0, 4, 2, 6, 1, 3, 5][..]),
+            (8, &[0, 4, 2, 6, 1, 3, 5, 7][..]),
+            (9, &[0, 8, 4, 2, 6, 1, 3, 5, 7][..]),
+            (10, &[0, 8, 4, 2, 6, 1, 3, 5, 7, 9][..]),
+            (11, &[0, 8, 4, 2, 6, 10, 1, 3, 5, 7, 9][..]),
+            (12, &[0, 8, 4, 2, 6, 10, 1, 3, 5, 7, 9, 11][..]),
+            (13, &[0, 8, 4, 12, 2, 6, 10, 1, 3, 5, 7, 9, 11][..]),
+            (14, &[0, 8, 4, 12, 2, 6, 10, 1, 3, 5, 7, 9, 11, 13][..]),
+            (15, &[0, 8, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13][..]),
+            (16, &[0, 8, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13, 15][..]),
+            (17, &[0, 8, 16, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13, 15][..]),
+        ] {
+            let iter = InterlaceIterator { len, next: 0, pass: 0 };
+            let lines = iter.collect::<Vec<_>>();
+            assert_eq!(lines, expect);
+        }
+    }
+}
diff --git a/src/reader/mod.rs b/src/reader/mod.rs
index 691fe49..5e06a17 100644
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -2,7 +2,7 @@ use std::borrow::Cow;
 use std::io;
 use std::iter::FusedIterator;
 use std::mem;
-use std::iter;
+
 use std::io::prelude::*;
 use std::num::NonZeroU64;
 use std::convert::{TryFrom, TryInto};
@@ -11,26 +11,15 @@ use crate::Repeat;
 use crate::common::{Block, Frame};
 
 mod decoder;
+mod converter;
+
 pub use self::decoder::{
     PLTE_CHANNELS, StreamingDecoder, Decoded, DecodingError, DecodingFormatError,
     Version, FrameDataType, OutputBuffer
 };
 
-const N_CHANNELS: usize = 4;
-
-/// Output mode for the image data
-#[derive(Clone, Copy, Debug, PartialEq)]
-#[repr(u8)]
-pub enum ColorOutput {
-    /// The decoder expands the image data to 32bit RGBA.
-    /// This affects:
-    ///
-    ///  - The buffer buffer of the `Frame` returned by [`Decoder::read_next_frame`].
-    ///  - `Decoder::fill_buffer`, `Decoder::buffer_size` and `Decoder::line_length`.
-    RGBA = 0,
-    /// The decoder returns the raw indexed data.
-    Indexed = 1,
-}
+use self::converter::PixelConverter;
+pub use self::converter::ColorOutput;
 
 #[derive(Clone, Debug)]
 /// The maximum amount of memory the decoder is allowed to use for each frame
@@ -238,20 +227,25 @@ impl<R: Read> ReadDecoder<R> {
     fn into_inner(self) -> io::BufReader<R> {
         self.reader
     }
+
+    fn decode_next_bytes(&mut self, out: &mut OutputBuffer<'_>) -> Result<usize, DecodingError> {
+        match self.decode_next(out)? {
+            Some(Decoded::BytesDecoded(len)) => return Ok(len.get()),
+            Some(Decoded::DataEnd) => return Ok(0),
+            _ => return Err(DecodingError::format("unexpected data")),
+        }
+    }
 }
 
 #[allow(dead_code)]
 /// GIF decoder. Create [`DecodeOptions`] to get started, and call [`DecodeOptions::read_info`].
 pub struct Decoder<R: Read> {
     decoder: ReadDecoder<R>,
-    color_output: ColorOutput,
-    memory_limit: MemoryLimit,
+    pixel_converter: PixelConverter,
     bg_color: Option<u8>,
     repeat: Repeat,
-    global_palette: Option<Vec<u8>>,
     current_frame: Frame<'static>,
     current_frame_data_type: FrameDataType,
-    buffer: Vec<u8>,
 }
 
 impl<R> Decoder<R> where R: Read {
@@ -276,11 +270,8 @@ impl<R> Decoder<R> where R: Read {
                 at_eof: false,
             },
             bg_color: None,
-            global_palette: None,
-            buffer: vec![],
+            pixel_converter: PixelConverter::new(options.color_output, options.memory_limit),
             repeat: Repeat::default(),
-            color_output: options.color_output,
-            memory_limit: options.memory_limit,
             current_frame: Frame::default(),
             current_frame_data_type: FrameDataType::Pixels,
         }
@@ -293,11 +284,7 @@ impl<R> Decoder<R> where R: Read {
                     self.bg_color = Some(bg_color);
                 }
                 Some(Decoded::GlobalPalette(palette)) => {
-                    self.global_palette = if !palette.is_empty() {
-                        Some(palette.into())
-                    } else {
-                        None
-                    };
+                    self.pixel_converter.set_global_palette(palette.into());
                 },
                 Some(Decoded::Repetitions(repeat)) => {
                     self.repeat = repeat;
@@ -315,7 +302,7 @@ impl<R> Decoder<R> where R: Read {
             }
         }
         // If the background color is invalid, ignore it
-        if let Some(ref palette) = self.global_palette {
+        if let Some(palette) = self.pixel_converter.global_palette() {
             if self.bg_color.unwrap_or(0) as usize >= (palette.len() / PLTE_CHANNELS) {
                 self.bg_color = None;
             }
@@ -330,7 +317,7 @@ impl<R> Decoder<R> where R: Read {
                 Some(Decoded::FrameMetadata(frame_data_type)) => {
                     self.current_frame = self.decoder.decoder.current_frame_mut().take();
                     self.current_frame_data_type = frame_data_type;
-                    if self.current_frame.palette.is_none() && self.global_palette.is_none() {
+                    if self.current_frame.palette.is_none() && self.global_palette().is_none() {
                         return Err(DecodingError::format(
                             "no color table available for current frame",
                         ));
@@ -351,31 +338,11 @@ impl<R> Decoder<R> where R: Read {
     ///
     /// You can also call `.into_iter()` on the decoder to use it as a regular iterator.
     pub fn read_next_frame(&mut self) -> Result<Option<&Frame<'static>>, DecodingError> {
-        if let Some(frame) = self.next_frame_info()? {
-            let (width, height) = (frame.width, frame.height);
-            let pixel_bytes = self.memory_limit
-                .buffer_size(self.color_output, width, height)
-                .ok_or_else(|| io::Error::new(io::ErrorKind::OutOfMemory, "image is too large"))?;
-
-            debug_assert_eq!(
-                pixel_bytes, self.buffer_size(),
-                "Checked computation diverges from required buffer size"
-            );
+        if let Some(_) = self.next_frame_info()? {
             match self.current_frame_data_type {
                 FrameDataType::Pixels => {
-                    let mut vec = match mem::replace(&mut self.current_frame.buffer, Cow::Borrowed(&[])) {
-                        // reuse buffer if possible without reallocating
-                        Cow::Owned(mut vec) if vec.capacity() >= pixel_bytes => {
-                            vec.resize(pixel_bytes, 0);
-                            vec
-                        },
-                        // resizing would realloc anyway, and 0-init is faster than a copy
-                        _ => vec![0; pixel_bytes],
-                    };
-                    self.read_into_buffer(&mut vec)?;
-                    self.current_frame.buffer = Cow::Owned(vec);
-                    self.current_frame.interlaced = false;
-                }
+                    self.pixel_converter.read_frame(&mut self.current_frame, &mut |out| self.decoder.decode_next_bytes(out))?;
+                },
                 FrameDataType::Lzw { min_code_size } => {
                     let mut vec = if matches!(self.current_frame.buffer, Cow::Owned(_)) {
                         let mut vec = mem::replace(&mut self.current_frame.buffer, Cow::Borrowed(&[])).into_owned();
@@ -385,7 +352,7 @@ impl<R> Decoder<R> where R: Read {
                         Vec::new()
                     };
                     // Guesstimate 2bpp
-                    vec.try_reserve(usize::from(width) * usize::from(height) / 4)
+                    vec.try_reserve(usize::from(self.current_frame.width) * usize::from(self.current_frame.height) / 4)
                         .map_err(|_| io::Error::from(io::ErrorKind::OutOfMemory))?;
                     self.copy_lzw_into_buffer(min_code_size, &mut vec)?;
                     self.current_frame.buffer = Cow::Owned(vec);
@@ -411,24 +378,7 @@ impl<R> Decoder<R> where R: Read {
     /// The length of `buf` must be at least `Self::buffer_size`.
     /// Deinterlaces the result.
     pub fn read_into_buffer(&mut self, buf: &mut [u8]) -> Result<(), DecodingError> {
-        if self.current_frame.interlaced {
-            let width = self.line_length();
-            let height = self.current_frame.height as usize;
-            for row in (InterlaceIterator { len: height, next: 0, pass: 0 }) {
-                let start = row * width;
-                // Handle a too-small buffer without panicking
-                let line = buf.get_mut(start .. start + width).ok_or_else(|| DecodingError::format("buffer too small"))?;
-                if !self.fill_buffer(line)? {
-                    return Err(DecodingError::format("image truncated"));
-                }
-            }
-        } else {
-            let buf = buf.get_mut(..self.buffer_size()).ok_or_else(|| DecodingError::format("buffer too small"))?;
-            if !self.fill_buffer(buf)? {
-                return Err(DecodingError::format("image truncated"));
-            }
-        };
-        Ok(())
+        self.pixel_converter.read_into_buffer(&mut self.current_frame, buf, &mut |out| self.decoder.decode_next_bytes(out))
     }
 
     fn copy_lzw_into_buffer(&mut self, min_code_size: u8, buf: &mut Vec<u8>) -> Result<(), DecodingError> {
@@ -450,77 +400,18 @@ impl<R> Decoder<R> where R: Read {
     ///
     /// `Self::next_frame_info` needs to be called beforehand. Returns `true` if the supplied
     /// buffer could be filled completely. Should not be called after `false` had been returned.
-    pub fn fill_buffer(&mut self, mut buf: &mut [u8]) -> Result<bool, DecodingError> {
-        loop {
-            let decode_into = match self.color_output {
-                // When decoding indexed data, LZW can write the pixels directly
-                ColorOutput::Indexed => &mut buf[..],
-                // When decoding RGBA, the pixel data will be expanded by a factor of 4,
-                // and it's simpler to decode indexed pixels to another buffer first
-                ColorOutput::RGBA => {
-                    let buffer_size = buf.len() / N_CHANNELS;
-                    if buffer_size == 0 {
-                        return Err(DecodingError::format("odd-sized buffer"));
-                    }
-                    if self.buffer.len() < buffer_size {
-                        self.buffer.resize(buffer_size, 0);
-                    }
-                    &mut self.buffer[..buffer_size]
-                }
-            };
-            match self.decoder.decode_next(&mut OutputBuffer::Slice(decode_into))? {
-                Some(Decoded::BytesDecoded(bytes_decoded)) => {
-                    let bytes_decoded = bytes_decoded.get();
-                    match self.color_output {
-                        ColorOutput::RGBA => {
-                            let transparent = self.current_frame.transparent;
-                            let palette: &[u8] = self.current_frame.palette.as_deref()
-                                .or(self.global_palette.as_deref())
-                                .unwrap_or_default(); // next_frame_info already checked it won't happen
-
-                            let (pixels, rest) = buf.split_at_mut(bytes_decoded * N_CHANNELS);
-                            buf = rest;
-
-                            for (rgba, idx) in pixels.chunks_exact_mut(N_CHANNELS).zip(self.buffer.iter().copied().take(bytes_decoded)) {
-                                let plte_offset = PLTE_CHANNELS * idx as usize;
-                                if let Some(colors) = palette.get(plte_offset..plte_offset+PLTE_CHANNELS) {
-                                    rgba[0] = colors[0];
-                                    rgba[1] = colors[1];
-                                    rgba[2] = colors[2];
-                                    rgba[3] = if let Some(t) = transparent {
-                                        if t == idx { 0x00 } else { 0xFF }
-                                    } else {
-                                        0xFF
-                                    };
-                                }
-                            }
-                        },
-                        ColorOutput::Indexed => {
-                            buf = &mut buf[bytes_decoded..];
-                        }
-                    }
-                    if buf.is_empty() {
-                        return Ok(true);
-                    }
-                }
-                Some(_) => return Ok(false), // make sure that no important result is missed
-                None => return Ok(false),
-            }
-        }
+    pub fn fill_buffer(&mut self, buf: &mut [u8]) -> Result<bool, DecodingError> {
+        self.pixel_converter.fill_buffer(&mut self.current_frame, buf, &mut |out| self.decoder.decode_next_bytes(out))
     }
 
     /// Output buffer size
     pub fn buffer_size(&self) -> usize {
-        self.line_length() * self.current_frame.height as usize
+        self.pixel_converter.buffer_size(&self.current_frame)
     }
 
     /// Line length of the current frame
     pub fn line_length(&self) -> usize {
-        use self::ColorOutput::*;
-        match self.color_output {
-            RGBA => self.current_frame.width as usize * N_CHANNELS,
-            Indexed => self.current_frame.width as usize,
-        }
+        self.pixel_converter.line_length(&self.current_frame)
     }
 
     /// Returns the color palette relevant for the frame that has been decoded
@@ -528,7 +419,7 @@ impl<R> Decoder<R> where R: Read {
     pub fn palette(&self) -> Result<&[u8], DecodingError> {
         Ok(match self.current_frame.palette {
             Some(ref table) => table,
-            None => self.global_palette.as_ref().ok_or(DecodingError::format(
+            None => self.global_palette().ok_or(DecodingError::format(
                 "no color table available for current frame",
             ))?,
         })
@@ -536,7 +427,7 @@ impl<R> Decoder<R> where R: Read {
 
     /// The global color palette
     pub fn global_palette(&self) -> Option<&[u8]> {
-        self.global_palette.as_deref()
+        self.pixel_converter.global_palette()
     }
 
     /// Width of the image
@@ -610,63 +501,3 @@ impl<R: Read> Iterator for DecoderIter<R> {
         }
     }
 }
-
-struct InterlaceIterator {
-    len: usize,
-    next: usize,
-    pass: usize,
-}
-
-impl iter::Iterator for InterlaceIterator {
-    type Item = usize;
-
-    #[inline]
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.len == 0 {
-            return None;
-        }
-        // although the pass never goes out of bounds thanks to len==0,
-        // the optimizer doesn't see it. get()? avoids costlier panicking code.
-        let mut next = self.next + *[8, 8, 4, 2].get(self.pass)?;
-        while next >= self.len {
-            debug_assert!(self.pass < 4);
-            next = *[4, 2, 1, 0].get(self.pass)?;
-            self.pass += 1;
-        }
-        mem::swap(&mut next, &mut self.next);
-        Some(next)
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::InterlaceIterator;
-
-    #[test]
-    fn test_interlace_iterator() {
-        for &(len, expect) in &[
-            (0, &[][..]),
-            (1, &[0][..]),
-            (2, &[0, 1][..]),
-            (3, &[0, 2, 1][..]),
-            (4, &[0, 2, 1, 3][..]),
-            (5, &[0, 4, 2, 1, 3][..]),
-            (6, &[0, 4, 2, 1, 3, 5][..]),
-            (7, &[0, 4, 2, 6, 1, 3, 5][..]),
-            (8, &[0, 4, 2, 6, 1, 3, 5, 7][..]),
-            (9, &[0, 8, 4, 2, 6, 1, 3, 5, 7][..]),
-            (10, &[0, 8, 4, 2, 6, 1, 3, 5, 7, 9][..]),
-            (11, &[0, 8, 4, 2, 6, 10, 1, 3, 5, 7, 9][..]),
-            (12, &[0, 8, 4, 2, 6, 10, 1, 3, 5, 7, 9, 11][..]),
-            (13, &[0, 8, 4, 12, 2, 6, 10, 1, 3, 5, 7, 9, 11][..]),
-            (14, &[0, 8, 4, 12, 2, 6, 10, 1, 3, 5, 7, 9, 11, 13][..]),
-            (15, &[0, 8, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13][..]),
-            (16, &[0, 8, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13, 15][..]),
-            (17, &[0, 8, 16, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13, 15][..]),
-        ] {
-            let iter = InterlaceIterator { len, next: 0, pass: 0 };
-            let lines = iter.collect::<Vec<_>>();
-            assert_eq!(lines, expect);
-        }
-    }
-}

From 8bd439cfd8bc2df68ed4757e9ba9d291f5a7561e Mon Sep 17 00:00:00 2001
From: Kornel <kornel@geekhood.net>
Date: Thu, 11 Jan 2024 01:12:52 +0000
Subject: [PATCH 4/4] API to undo make_lzw_pre_encoded

---
 Cargo.toml              |  1 +
 Changes.md              |  2 ++
 examples/parallel.rs    | 75 +++++++++++++++++++++++++++++++++++++++++
 src/lib.rs              |  2 +-
 src/reader/converter.rs | 16 +++++++--
 src/reader/decoder.rs   | 73 +++++++++++++++++++++++++++++++++++++--
 src/reader/mod.rs       |  8 ++---
 7 files changed, 166 insertions(+), 11 deletions(-)
 create mode 100644 examples/parallel.rs

diff --git a/Cargo.toml b/Cargo.toml
index 71b1ac4..030d393 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,6 +22,7 @@ color_quant = { version = "1.1", optional = true }
 glob = "0.3"
 criterion = "0.5.1"
 png = "0.17.10"
+rayon = "1.8.0" # for parallel reencoding example
 
 [features]
 default = ["raii_no_panic", "std", "color_quant"]
diff --git a/Changes.md b/Changes.md
index a62c530..ff67e81 100644
--- a/Changes.md
+++ b/Changes.md
@@ -7,6 +7,8 @@ Features:
    It works together with `write_lzw_pre_encoded_frame` for quick rewriting of GIF files.
  - Added support pre-allocated `Vec`s in `from_palette_pixels`
  - Added ability to recover the `io::Read`er after decoding.
+ - Added support for decompressing `Frame.buffer` with LZW data,
+   which enables fully parallel GIF re-encoding (see examples/parallel.rs),
 
 Optimization:
  - Less buffering, copying, and lower peak memory usage.
diff --git a/examples/parallel.rs b/examples/parallel.rs
new file mode 100644
index 0000000..893368c
--- /dev/null
+++ b/examples/parallel.rs
@@ -0,0 +1,75 @@
+//! Reencodes GIF in parallel
+
+use gif::streaming_decoder::FrameDecoder;
+use gif::DecodeOptions;
+use rayon::iter::ParallelBridge;
+use rayon::iter::ParallelIterator;
+use std::env;
+use std::fs::File;
+use std::io::BufWriter;
+use std::path::PathBuf;
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let input_path = PathBuf::from(
+        env::args_os()
+            .nth(1)
+            .ok_or("Specify a GIF path as the first argument")?,
+    );
+
+    let input = std::fs::read(&input_path)?;
+    let input_size = input.len();
+
+    let start = std::time::Instant::now();
+
+    let mut options = DecodeOptions::new();
+    options.skip_frame_decoding(true); // This gives LZW frames
+
+    let decoder = options.read_info(std::io::Cursor::new(input))?;
+    let repeat = decoder.repeat();
+    let screen_width = decoder.width();
+    let screen_height = decoder.height();
+    let global_pal = decoder.global_palette().unwrap_or_default().to_vec();
+
+    let output_file = format!(
+        "{}-reencoded.gif",
+        input_path.file_stem().unwrap().to_str().unwrap()
+    );
+    let output = BufWriter::new(File::create(output_file)?);
+    let mut encoder = gif::Encoder::new(output, screen_width, screen_height, &global_pal)?;
+    encoder.set_repeat(repeat)?;
+
+    let (send, recv) = std::sync::mpsc::channel();
+
+    decoder.into_iter().enumerate().par_bridge().try_for_each(move |(frame_number, frame)| {
+        let mut frame = frame?;
+        FrameDecoder::new(DecodeOptions::new())
+            .decode_lzw_encoded_frame(&mut frame)
+            .unwrap();
+        // frame is now pixels
+        frame.make_lzw_pre_encoded();
+        // frame is now LZW again, re-encoded
+        send.send((frame_number, frame)).unwrap();
+        Ok::<_, gif::DecodingError>(())
+    })?;
+
+    // Decoding and encoding can happen in parallel, but writing to the GIF file is sequential
+    let mut next_frame_number = 0;
+    let mut frames_to_process = Vec::new();
+    for (frame_number, frame) in recv {
+        // frames can arrive in any order, since they're processed in parallel,
+        // so they have to be stored in a queue
+        frames_to_process.push((frame_number, frame));
+        while let Some(index) = frames_to_process.iter().position(|&(num, _)| num == next_frame_number) {
+            let frame = frames_to_process.remove(index).1;
+            encoder.write_lzw_pre_encoded_frame(&frame)?;
+            next_frame_number += 1;
+        }
+    }
+    encoder.into_inner()?;
+
+    let seconds = start.elapsed().as_millis() as f64 / 1000.;
+    let rate = (input_size / 1024 / 1024) as f64 / seconds;
+
+    eprintln!("Finished in {seconds:0.2}s, {rate:0.0}MiB/s {}", if cfg!(debug_assertions) { ". Run with --release for more speed." } else { "" });
+    Ok(())
+}
diff --git a/src/lib.rs b/src/lib.rs
index 1a845e7..b883669 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -128,7 +128,7 @@ pub use crate::encoder::{Encoder, ExtensionData, Repeat, EncodingError};
 /// Low-level, advanced decoder. Prefer [`Decoder`] instead, which can stream frames too.
 pub mod streaming_decoder {
     pub use crate::common::Block;
-    pub use crate::reader::{StreamingDecoder, OutputBuffer, Decoded, FrameDataType};
+    pub use crate::reader::{Decoded, FrameDataType, FrameDecoder, OutputBuffer, StreamingDecoder};
 }
 
 macro_rules! insert_as_doc {
diff --git a/src/reader/converter.rs b/src/reader/converter.rs
index bf0ea13..f6183f1 100644
--- a/src/reader/converter.rs
+++ b/src/reader/converter.rs
@@ -45,7 +45,7 @@ impl PixelConverter {
         }
     }
 
-    pub(crate) fn read_frame(&mut self, frame: &mut Frame<'_>, data_callback: FillBufferCallback<'_>) -> Result<(), DecodingError> {
+    pub(crate) fn check_buffer_size(&mut self, frame: &Frame<'_>) -> Result<usize, DecodingError> {
         let pixel_bytes = self.memory_limit
             .buffer_size(self.color_output, frame.width, frame.height)
             .ok_or_else(|| io::Error::new(io::ErrorKind::OutOfMemory, "image is too large"))?;
@@ -54,7 +54,12 @@ impl PixelConverter {
             pixel_bytes, self.buffer_size(frame),
             "Checked computation diverges from required buffer size"
         );
+        Ok(pixel_bytes)
+    }
 
+    #[inline]
+    pub(crate) fn read_frame(&mut self, frame: &mut Frame<'_>, data_callback: FillBufferCallback<'_>) -> Result<(), DecodingError> {
+        let pixel_bytes = self.check_buffer_size(frame)?;
         let mut vec = match mem::replace(&mut frame.buffer, Cow::Borrowed(&[])) {
             // reuse buffer if possible without reallocating
             Cow::Owned(mut vec) if vec.capacity() >= pixel_bytes => {
@@ -82,7 +87,9 @@ impl PixelConverter {
         }
     }
 
-    pub(crate) fn fill_buffer(&mut self, current_frame: &mut Frame<'_>, mut buf: &mut [u8], data_callback: FillBufferCallback<'_>) -> Result<bool, DecodingError> {
+    /// Use `read_into_buffer` to deinterlace
+    #[inline(never)]
+    pub(crate) fn fill_buffer(&mut self, current_frame: &Frame<'_>, mut buf: &mut [u8], data_callback: FillBufferCallback<'_>) -> Result<bool, DecodingError> {
         loop {
             let decode_into = match self.color_output {
                 // When decoding indexed data, LZW can write the pixels directly
@@ -151,7 +158,10 @@ impl PixelConverter {
         };
     }
 
-    pub(crate) fn read_into_buffer(&mut self, frame: &mut Frame<'_>, buf: &mut [u8], data_callback: FillBufferCallback<'_>) -> Result<(), DecodingError> {
+    /// Applies deinterlacing
+    ///
+    /// Set `frame.interlaced = false` afterwards if you're putting the buffer back into the `Frame`
+    pub(crate) fn read_into_buffer(&mut self, frame: &Frame<'_>, buf: &mut [u8], data_callback: FillBufferCallback<'_>) -> Result<(), DecodingError> {
         if frame.interlaced {
             let width = self.line_length(frame);
             let height = frame.height as usize;
diff --git a/src/reader/decoder.rs b/src/reader/decoder.rs
index aa0f551..1020dbd 100644
--- a/src/reader/decoder.rs
+++ b/src/reader/decoder.rs
@@ -1,3 +1,4 @@
+use std::borrow::Cow;
 use std::cmp;
 use std::error;
 use std::fmt;
@@ -95,7 +96,7 @@ impl From<DecodingFormatError> for DecodingError {
     }
 }
 
-/// Varies depending on skip_frame_decoding
+/// Varies depending on `skip_frame_decoding`
 #[derive(Debug, Copy, Clone)]
 pub enum FrameDataType {
     /// `Frame.buffer` will be regular pixel data
@@ -178,6 +179,8 @@ enum State {
 }
 use self::State::*;
 
+use super::converter::PixelConverter;
+
 /// U16 values that may occur in a GIF image
 #[derive(Debug, Copy, Clone)]
 enum U16Value {
@@ -209,6 +212,69 @@ enum ByteValue {
     CodeSize,
 }
 
+/// Decoder for `Frame::make_lzw_pre_encoded`
+pub struct FrameDecoder {
+    lzw_reader: LzwReader,
+    pixel_converter: PixelConverter,
+}
+
+impl FrameDecoder {
+    /// See also `set_global_palette`
+    #[inline]
+    #[must_use]
+    pub fn new(options: DecodeOptions) -> Self {
+        Self {
+            lzw_reader: LzwReader::new(options.check_for_end_code),
+            pixel_converter: PixelConverter::new(options.color_output, options.memory_limit),
+        }
+    }
+
+    /// Palette used for RGBA conversion
+    #[inline]
+    pub fn set_global_palette(&mut self, palette: Vec<u8>) {
+        self.pixel_converter.set_global_palette(palette);
+    }
+
+    /// Converts the frame in-place, replacing its LZW buffer with pixels.
+    ///
+    /// If you get an error about invalid min code size, the buffer was probably pixels, not compressed data.
+    #[inline]
+    pub fn decode_lzw_encoded_frame(&mut self, frame: &mut Frame<'_>) -> Result<(), DecodingError> {
+        let pixel_bytes = self.pixel_converter.check_buffer_size(frame)?;
+        let mut vec = vec![0; pixel_bytes];
+        self.decode_lzw_encoded_frame_into_buffer(frame, &mut vec)?;
+        frame.buffer = Cow::Owned(vec);
+        frame.interlaced = false;
+        Ok(())
+    }
+
+    /// Converts into the given buffer. It must be [`buffer_size()`] bytes large.
+    ///
+    /// Pixels are always deinterlaced, so update `frame.interlaced` afterwards if you're putting the buffer back into the frame.
+    pub fn decode_lzw_encoded_frame_into_buffer(&mut self, frame: &Frame<'_>, buf: &mut [u8]) -> Result<(), DecodingError> {
+        let (&min_code_size, mut data) = frame.buffer.split_first().unwrap_or((&2, &[]));
+        self.lzw_reader.reset(min_code_size)?;
+        let lzw_reader = &mut self.lzw_reader;
+        self.pixel_converter.read_into_buffer(frame, buf, &mut move |out| {
+            loop {
+                let (bytes_read, bytes_written) = lzw_reader.decode_bytes(data, out)?;
+                data = &data.get(bytes_read..).unwrap_or_default();
+                if bytes_written > 0 || bytes_read == 0 || data.is_empty() {
+                    return Ok(bytes_written)
+                }
+            }
+        })?;
+        Ok(())
+    }
+
+    /// Number of bytes required for `decode_lzw_encoded_frame_into_buffer`
+    #[inline]
+    #[must_use]
+    pub fn buffer_size(&self, frame: &Frame<'_>) -> usize {
+        self.pixel_converter.buffer_size(frame)
+    }
+}
+
 struct LzwReader {
     decoder: Option<LzwDecoder>,
     min_code_size: u8,
@@ -225,8 +291,9 @@ impl LzwReader {
     }
 
     pub fn reset(&mut self, min_code_size: u8) -> Result<(), DecodingError> {
-        // LZW spec: max 12 bits per code
-        if min_code_size > 11 {
+        // LZW spec: max 12 bits per code. This check helps catch confusion
+        // between LZW-compressed buffers and raw pixel data
+        if min_code_size > 11 || min_code_size < 1 {
             return Err(DecodingError::format("invalid minimal code size"));
         }
 
diff --git a/src/reader/mod.rs b/src/reader/mod.rs
index 5e06a17..a72e462 100644
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -15,7 +15,7 @@ mod converter;
 
 pub use self::decoder::{
     PLTE_CHANNELS, StreamingDecoder, Decoded, DecodingError, DecodingFormatError,
-    Version, FrameDataType, OutputBuffer
+    Version, FrameDataType, OutputBuffer, FrameDecoder
 };
 
 use self::converter::PixelConverter;
@@ -230,9 +230,9 @@ impl<R: Read> ReadDecoder<R> {
 
     fn decode_next_bytes(&mut self, out: &mut OutputBuffer<'_>) -> Result<usize, DecodingError> {
         match self.decode_next(out)? {
-            Some(Decoded::BytesDecoded(len)) => return Ok(len.get()),
-            Some(Decoded::DataEnd) => return Ok(0),
-            _ => return Err(DecodingError::format("unexpected data")),
+            Some(Decoded::BytesDecoded(len)) => Ok(len.get()),
+            Some(Decoded::DataEnd) => Ok(0),
+            _ => Err(DecodingError::format("unexpected data")),
         }
     }
 }