From 1f7df36c86f900a33b502f53fa96ea5ff63b91a2 Mon Sep 17 00:00:00 2001 From: Kornel Date: Sat, 13 Jan 2024 12:59:28 +0000 Subject: [PATCH 1/4] Unused --- src/reader/decoder.rs | 2 -- src/reader/mod.rs | 6 ------ 2 files changed, 8 deletions(-) diff --git a/src/reader/decoder.rs b/src/reader/decoder.rs index fa544a0..417cd50 100644 --- a/src/reader/decoder.rs +++ b/src/reader/decoder.rs @@ -359,8 +359,6 @@ impl StreamingDecoder { mut buf: &[u8], write_into: &mut OutputBuffer<'_>, ) -> Result<(usize, Decoded), DecodingError> { - // NOTE: Do not change the function signature without double-checking the - // unsafe block! let len = buf.len(); while !buf.is_empty() { let (bytes, decoded) = self.next_state(buf, write_into)?; diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 1d0f7dd..e73b445 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -218,12 +218,6 @@ impl ReadDecoder { return Err(io::ErrorKind::UnexpectedEof.into()); } - // Dead code checks the lifetimes that the later mem::transmute can't. - #[cfg(test)] - if false { - return self.decoder.update(buf, write_into).map(|(_, res)| Some(res)); - } - self.decoder.update(buf, write_into)? }; self.reader.consume(consumed); From 1285484e8d10e5e73f1130acfe258d1febadc791 Mon Sep 17 00:00:00 2001 From: Kornel Date: Sat, 13 Jan 2024 13:00:18 +0000 Subject: [PATCH 2/4] Inlining control non-inline decode_next allows inlining all of its content --- src/reader/decoder.rs | 1 + src/reader/mod.rs | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/src/reader/decoder.rs b/src/reader/decoder.rs index 417cd50..aa0f551 100644 --- a/src/reader/decoder.rs +++ b/src/reader/decoder.rs @@ -414,6 +414,7 @@ impl StreamingDecoder { self.version } + #[inline] fn next_state(&mut self, buf: &[u8], write_into: &mut OutputBuffer<'_>) -> Result<(usize, Decoded), DecodingError> { macro_rules! goto ( ($n:expr, $state:expr) => ({ diff --git a/src/reader/mod.rs b/src/reader/mod.rs index e73b445..691fe49 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -118,6 +118,7 @@ impl Default for DecodeOptions { impl DecodeOptions { /// Creates a new decoder builder #[must_use] + #[inline] pub fn new() -> DecodeOptions { DecodeOptions { memory_limit: MemoryLimit::Bytes(50_000_000.try_into().unwrap()), // 50 MB @@ -130,6 +131,7 @@ impl DecodeOptions { } /// Configure how color data is decoded. + #[inline] pub fn set_color_output(&mut self, color: ColorOutput) { self.color_output = color; } @@ -210,6 +212,7 @@ struct ReadDecoder { } impl ReadDecoder { + #[inline(never)] fn decode_next(&mut self, write_into: &mut OutputBuffer<'_>) -> Result, DecodingError> { while !self.at_eof { let (consumed, result) = { @@ -253,12 +256,14 @@ pub struct Decoder { impl Decoder where R: Read { /// Create a new decoder with default options. + #[inline] pub fn new(reader: R) -> Result { DecodeOptions::new().read_info(reader) } /// Return a builder that allows configuring limits etc. #[must_use] + #[inline] pub fn build() -> DecodeOptions { DecodeOptions::new() } @@ -519,6 +524,7 @@ impl Decoder where R: Read { } /// Returns the color palette relevant for the frame that has been decoded + #[inline] pub fn palette(&self) -> Result<&[u8], DecodingError> { Ok(match self.current_frame.palette { Some(ref table) => table, @@ -534,11 +540,13 @@ impl Decoder where R: Read { } /// Width of the image + #[inline] pub fn width(&self) -> u16 { self.decoder.decoder.width() } /// Height of the image + #[inline] pub fn height(&self) -> u16 { self.decoder.decoder.height() } @@ -557,6 +565,7 @@ impl Decoder where R: Read { } /// Number of loop repetitions + #[inline] pub fn repeat(&self) -> Repeat { self.repeat } From c5deb27d5e391aa14408791002a506ad5caab090 Mon Sep 17 00:00:00 2001 From: Kornel Date: Thu, 11 Jan 2024 00:52:12 +0000 Subject: [PATCH 3/4] Move deinterlacing and RGBA conversions --- src/reader/converter.rs | 234 ++++++++++++++++++++++++++++++++++++++++ src/reader/mod.rs | 227 +++++--------------------------------- 2 files changed, 263 insertions(+), 198 deletions(-) create mode 100644 src/reader/converter.rs diff --git a/src/reader/converter.rs b/src/reader/converter.rs new file mode 100644 index 0000000..bf0ea13 --- /dev/null +++ b/src/reader/converter.rs @@ -0,0 +1,234 @@ +use std::borrow::Cow; +use std::io; +use std::mem; +use std::iter; +use crate::common::Frame; +use crate::MemoryLimit; + +use super::decoder::{ + PLTE_CHANNELS, DecodingError, OutputBuffer +}; + +pub(crate) const N_CHANNELS: usize = 4; + +/// Output mode for the image data +#[derive(Clone, Copy, Debug, PartialEq)] +#[repr(u8)] +pub enum ColorOutput { + /// The decoder expands the image data to 32bit RGBA. + /// This affects: + /// + /// - The buffer buffer of the `Frame` returned by [`Decoder::read_next_frame`]. + /// - `Decoder::fill_buffer`, `Decoder::buffer_size` and `Decoder::line_length`. + RGBA = 0, + /// The decoder returns the raw indexed data. + Indexed = 1, +} + +pub(crate) type FillBufferCallback<'a> = &'a mut dyn FnMut(&mut OutputBuffer<'_>) -> Result; + +/// Deinterlaces and expands to RGBA if needed +pub(crate) struct PixelConverter { + memory_limit: MemoryLimit, + color_output: ColorOutput, + buffer: Vec, + global_palette: Option>, +} + +impl PixelConverter { + pub(crate) fn new(color_output: ColorOutput, memory_limit: MemoryLimit) -> Self { + Self { + memory_limit, + color_output, + buffer: Vec::new(), + global_palette: None, + } + } + + pub(crate) fn read_frame(&mut self, frame: &mut Frame<'_>, data_callback: FillBufferCallback<'_>) -> Result<(), DecodingError> { + let pixel_bytes = self.memory_limit + .buffer_size(self.color_output, frame.width, frame.height) + .ok_or_else(|| io::Error::new(io::ErrorKind::OutOfMemory, "image is too large"))?; + + debug_assert_eq!( + pixel_bytes, self.buffer_size(frame), + "Checked computation diverges from required buffer size" + ); + + let mut vec = match mem::replace(&mut frame.buffer, Cow::Borrowed(&[])) { + // reuse buffer if possible without reallocating + Cow::Owned(mut vec) if vec.capacity() >= pixel_bytes => { + vec.resize(pixel_bytes, 0); + vec + }, + // resizing would realloc anyway, and 0-init is faster than a copy + _ => vec![0; pixel_bytes], + }; + self.read_into_buffer(frame, &mut vec, data_callback)?; + frame.buffer = Cow::Owned(vec); + frame.interlaced = false; + Ok(()) + } + + pub(crate) fn buffer_size(&self, frame: &Frame<'_>) -> usize { + self.line_length(frame) * frame.height as usize + } + + pub(crate) fn line_length(&self, frame: &Frame<'_>) -> usize { + use self::ColorOutput::*; + match self.color_output { + RGBA => frame.width as usize * N_CHANNELS, + Indexed => frame.width as usize, + } + } + + pub(crate) fn fill_buffer(&mut self, current_frame: &mut Frame<'_>, mut buf: &mut [u8], data_callback: FillBufferCallback<'_>) -> Result { + loop { + let decode_into = match self.color_output { + // When decoding indexed data, LZW can write the pixels directly + ColorOutput::Indexed => &mut buf[..], + // When decoding RGBA, the pixel data will be expanded by a factor of 4, + // and it's simpler to decode indexed pixels to another buffer first + ColorOutput::RGBA => { + let buffer_size = buf.len() / N_CHANNELS; + if buffer_size == 0 { + return Err(DecodingError::format("odd-sized buffer")); + } + if self.buffer.len() < buffer_size { + self.buffer.resize(buffer_size, 0); + } + &mut self.buffer[..buffer_size] + } + }; + match data_callback(&mut OutputBuffer::Slice(decode_into))? { + 0 => return Ok(false), + bytes_decoded => { + match self.color_output { + ColorOutput::RGBA => { + let transparent = current_frame.transparent; + let palette: &[u8] = current_frame.palette.as_deref() + .or(self.global_palette.as_deref()) + .unwrap_or_default(); // next_frame_info already checked it won't happen + + let (pixels, rest) = buf.split_at_mut(bytes_decoded * N_CHANNELS); + buf = rest; + + for (rgba, idx) in pixels.chunks_exact_mut(N_CHANNELS).zip(self.buffer.iter().copied().take(bytes_decoded)) { + let plte_offset = PLTE_CHANNELS * idx as usize; + if let Some(colors) = palette.get(plte_offset..plte_offset+PLTE_CHANNELS) { + rgba[0] = colors[0]; + rgba[1] = colors[1]; + rgba[2] = colors[2]; + rgba[3] = if let Some(t) = transparent { + if t == idx { 0x00 } else { 0xFF } + } else { + 0xFF + }; + } + } + }, + ColorOutput::Indexed => { + buf = &mut buf[bytes_decoded..]; + } + } + if buf.is_empty() { + return Ok(true); + } + }, + } + } + } + + pub(crate) fn global_palette(&self) -> Option<&[u8]> { + self.global_palette.as_deref() + } + + pub(crate) fn set_global_palette(&mut self, palette: Vec) { + self.global_palette = if !palette.is_empty() { + Some(palette) + } else { + None + }; + } + + pub(crate) fn read_into_buffer(&mut self, frame: &mut Frame<'_>, buf: &mut [u8], data_callback: FillBufferCallback<'_>) -> Result<(), DecodingError> { + if frame.interlaced { + let width = self.line_length(frame); + let height = frame.height as usize; + for row in (InterlaceIterator { len: height, next: 0, pass: 0 }) { + let start = row * width; + // Handle a too-small buffer without panicking + let line = buf.get_mut(start .. start + width).ok_or_else(|| DecodingError::format("buffer too small"))?; + if !self.fill_buffer(frame, line, data_callback)? { + return Err(DecodingError::format("image truncated")); + } + } + } else { + let buf = buf.get_mut(..self.buffer_size(frame)).ok_or_else(|| DecodingError::format("buffer too small"))?; + if !self.fill_buffer(frame, buf, data_callback)? { + return Err(DecodingError::format("image truncated")); + } + }; + Ok(()) + } +} + +struct InterlaceIterator { + len: usize, + next: usize, + pass: usize, +} + +impl iter::Iterator for InterlaceIterator { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + if self.len == 0 { + return None; + } + // although the pass never goes out of bounds thanks to len==0, + // the optimizer doesn't see it. get()? avoids costlier panicking code. + let mut next = self.next + *[8, 8, 4, 2].get(self.pass)?; + while next >= self.len { + debug_assert!(self.pass < 4); + next = *[4, 2, 1, 0].get(self.pass)?; + self.pass += 1; + } + mem::swap(&mut next, &mut self.next); + Some(next) + } +} + +#[cfg(test)] +mod test { + use super::InterlaceIterator; + + #[test] + fn test_interlace_iterator() { + for &(len, expect) in &[ + (0, &[][..]), + (1, &[0][..]), + (2, &[0, 1][..]), + (3, &[0, 2, 1][..]), + (4, &[0, 2, 1, 3][..]), + (5, &[0, 4, 2, 1, 3][..]), + (6, &[0, 4, 2, 1, 3, 5][..]), + (7, &[0, 4, 2, 6, 1, 3, 5][..]), + (8, &[0, 4, 2, 6, 1, 3, 5, 7][..]), + (9, &[0, 8, 4, 2, 6, 1, 3, 5, 7][..]), + (10, &[0, 8, 4, 2, 6, 1, 3, 5, 7, 9][..]), + (11, &[0, 8, 4, 2, 6, 10, 1, 3, 5, 7, 9][..]), + (12, &[0, 8, 4, 2, 6, 10, 1, 3, 5, 7, 9, 11][..]), + (13, &[0, 8, 4, 12, 2, 6, 10, 1, 3, 5, 7, 9, 11][..]), + (14, &[0, 8, 4, 12, 2, 6, 10, 1, 3, 5, 7, 9, 11, 13][..]), + (15, &[0, 8, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13][..]), + (16, &[0, 8, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13, 15][..]), + (17, &[0, 8, 16, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13, 15][..]), + ] { + let iter = InterlaceIterator { len, next: 0, pass: 0 }; + let lines = iter.collect::>(); + assert_eq!(lines, expect); + } + } +} diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 691fe49..5e06a17 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -2,7 +2,7 @@ use std::borrow::Cow; use std::io; use std::iter::FusedIterator; use std::mem; -use std::iter; + use std::io::prelude::*; use std::num::NonZeroU64; use std::convert::{TryFrom, TryInto}; @@ -11,26 +11,15 @@ use crate::Repeat; use crate::common::{Block, Frame}; mod decoder; +mod converter; + pub use self::decoder::{ PLTE_CHANNELS, StreamingDecoder, Decoded, DecodingError, DecodingFormatError, Version, FrameDataType, OutputBuffer }; -const N_CHANNELS: usize = 4; - -/// Output mode for the image data -#[derive(Clone, Copy, Debug, PartialEq)] -#[repr(u8)] -pub enum ColorOutput { - /// The decoder expands the image data to 32bit RGBA. - /// This affects: - /// - /// - The buffer buffer of the `Frame` returned by [`Decoder::read_next_frame`]. - /// - `Decoder::fill_buffer`, `Decoder::buffer_size` and `Decoder::line_length`. - RGBA = 0, - /// The decoder returns the raw indexed data. - Indexed = 1, -} +use self::converter::PixelConverter; +pub use self::converter::ColorOutput; #[derive(Clone, Debug)] /// The maximum amount of memory the decoder is allowed to use for each frame @@ -238,20 +227,25 @@ impl ReadDecoder { fn into_inner(self) -> io::BufReader { self.reader } + + fn decode_next_bytes(&mut self, out: &mut OutputBuffer<'_>) -> Result { + match self.decode_next(out)? { + Some(Decoded::BytesDecoded(len)) => return Ok(len.get()), + Some(Decoded::DataEnd) => return Ok(0), + _ => return Err(DecodingError::format("unexpected data")), + } + } } #[allow(dead_code)] /// GIF decoder. Create [`DecodeOptions`] to get started, and call [`DecodeOptions::read_info`]. pub struct Decoder { decoder: ReadDecoder, - color_output: ColorOutput, - memory_limit: MemoryLimit, + pixel_converter: PixelConverter, bg_color: Option, repeat: Repeat, - global_palette: Option>, current_frame: Frame<'static>, current_frame_data_type: FrameDataType, - buffer: Vec, } impl Decoder where R: Read { @@ -276,11 +270,8 @@ impl Decoder where R: Read { at_eof: false, }, bg_color: None, - global_palette: None, - buffer: vec![], + pixel_converter: PixelConverter::new(options.color_output, options.memory_limit), repeat: Repeat::default(), - color_output: options.color_output, - memory_limit: options.memory_limit, current_frame: Frame::default(), current_frame_data_type: FrameDataType::Pixels, } @@ -293,11 +284,7 @@ impl Decoder where R: Read { self.bg_color = Some(bg_color); } Some(Decoded::GlobalPalette(palette)) => { - self.global_palette = if !palette.is_empty() { - Some(palette.into()) - } else { - None - }; + self.pixel_converter.set_global_palette(palette.into()); }, Some(Decoded::Repetitions(repeat)) => { self.repeat = repeat; @@ -315,7 +302,7 @@ impl Decoder where R: Read { } } // If the background color is invalid, ignore it - if let Some(ref palette) = self.global_palette { + if let Some(palette) = self.pixel_converter.global_palette() { if self.bg_color.unwrap_or(0) as usize >= (palette.len() / PLTE_CHANNELS) { self.bg_color = None; } @@ -330,7 +317,7 @@ impl Decoder where R: Read { Some(Decoded::FrameMetadata(frame_data_type)) => { self.current_frame = self.decoder.decoder.current_frame_mut().take(); self.current_frame_data_type = frame_data_type; - if self.current_frame.palette.is_none() && self.global_palette.is_none() { + if self.current_frame.palette.is_none() && self.global_palette().is_none() { return Err(DecodingError::format( "no color table available for current frame", )); @@ -351,31 +338,11 @@ impl Decoder where R: Read { /// /// You can also call `.into_iter()` on the decoder to use it as a regular iterator. pub fn read_next_frame(&mut self) -> Result>, DecodingError> { - if let Some(frame) = self.next_frame_info()? { - let (width, height) = (frame.width, frame.height); - let pixel_bytes = self.memory_limit - .buffer_size(self.color_output, width, height) - .ok_or_else(|| io::Error::new(io::ErrorKind::OutOfMemory, "image is too large"))?; - - debug_assert_eq!( - pixel_bytes, self.buffer_size(), - "Checked computation diverges from required buffer size" - ); + if let Some(_) = self.next_frame_info()? { match self.current_frame_data_type { FrameDataType::Pixels => { - let mut vec = match mem::replace(&mut self.current_frame.buffer, Cow::Borrowed(&[])) { - // reuse buffer if possible without reallocating - Cow::Owned(mut vec) if vec.capacity() >= pixel_bytes => { - vec.resize(pixel_bytes, 0); - vec - }, - // resizing would realloc anyway, and 0-init is faster than a copy - _ => vec![0; pixel_bytes], - }; - self.read_into_buffer(&mut vec)?; - self.current_frame.buffer = Cow::Owned(vec); - self.current_frame.interlaced = false; - } + self.pixel_converter.read_frame(&mut self.current_frame, &mut |out| self.decoder.decode_next_bytes(out))?; + }, FrameDataType::Lzw { min_code_size } => { let mut vec = if matches!(self.current_frame.buffer, Cow::Owned(_)) { let mut vec = mem::replace(&mut self.current_frame.buffer, Cow::Borrowed(&[])).into_owned(); @@ -385,7 +352,7 @@ impl Decoder where R: Read { Vec::new() }; // Guesstimate 2bpp - vec.try_reserve(usize::from(width) * usize::from(height) / 4) + vec.try_reserve(usize::from(self.current_frame.width) * usize::from(self.current_frame.height) / 4) .map_err(|_| io::Error::from(io::ErrorKind::OutOfMemory))?; self.copy_lzw_into_buffer(min_code_size, &mut vec)?; self.current_frame.buffer = Cow::Owned(vec); @@ -411,24 +378,7 @@ impl Decoder where R: Read { /// The length of `buf` must be at least `Self::buffer_size`. /// Deinterlaces the result. pub fn read_into_buffer(&mut self, buf: &mut [u8]) -> Result<(), DecodingError> { - if self.current_frame.interlaced { - let width = self.line_length(); - let height = self.current_frame.height as usize; - for row in (InterlaceIterator { len: height, next: 0, pass: 0 }) { - let start = row * width; - // Handle a too-small buffer without panicking - let line = buf.get_mut(start .. start + width).ok_or_else(|| DecodingError::format("buffer too small"))?; - if !self.fill_buffer(line)? { - return Err(DecodingError::format("image truncated")); - } - } - } else { - let buf = buf.get_mut(..self.buffer_size()).ok_or_else(|| DecodingError::format("buffer too small"))?; - if !self.fill_buffer(buf)? { - return Err(DecodingError::format("image truncated")); - } - }; - Ok(()) + self.pixel_converter.read_into_buffer(&mut self.current_frame, buf, &mut |out| self.decoder.decode_next_bytes(out)) } fn copy_lzw_into_buffer(&mut self, min_code_size: u8, buf: &mut Vec) -> Result<(), DecodingError> { @@ -450,77 +400,18 @@ impl Decoder where R: Read { /// /// `Self::next_frame_info` needs to be called beforehand. Returns `true` if the supplied /// buffer could be filled completely. Should not be called after `false` had been returned. - pub fn fill_buffer(&mut self, mut buf: &mut [u8]) -> Result { - loop { - let decode_into = match self.color_output { - // When decoding indexed data, LZW can write the pixels directly - ColorOutput::Indexed => &mut buf[..], - // When decoding RGBA, the pixel data will be expanded by a factor of 4, - // and it's simpler to decode indexed pixels to another buffer first - ColorOutput::RGBA => { - let buffer_size = buf.len() / N_CHANNELS; - if buffer_size == 0 { - return Err(DecodingError::format("odd-sized buffer")); - } - if self.buffer.len() < buffer_size { - self.buffer.resize(buffer_size, 0); - } - &mut self.buffer[..buffer_size] - } - }; - match self.decoder.decode_next(&mut OutputBuffer::Slice(decode_into))? { - Some(Decoded::BytesDecoded(bytes_decoded)) => { - let bytes_decoded = bytes_decoded.get(); - match self.color_output { - ColorOutput::RGBA => { - let transparent = self.current_frame.transparent; - let palette: &[u8] = self.current_frame.palette.as_deref() - .or(self.global_palette.as_deref()) - .unwrap_or_default(); // next_frame_info already checked it won't happen - - let (pixels, rest) = buf.split_at_mut(bytes_decoded * N_CHANNELS); - buf = rest; - - for (rgba, idx) in pixels.chunks_exact_mut(N_CHANNELS).zip(self.buffer.iter().copied().take(bytes_decoded)) { - let plte_offset = PLTE_CHANNELS * idx as usize; - if let Some(colors) = palette.get(plte_offset..plte_offset+PLTE_CHANNELS) { - rgba[0] = colors[0]; - rgba[1] = colors[1]; - rgba[2] = colors[2]; - rgba[3] = if let Some(t) = transparent { - if t == idx { 0x00 } else { 0xFF } - } else { - 0xFF - }; - } - } - }, - ColorOutput::Indexed => { - buf = &mut buf[bytes_decoded..]; - } - } - if buf.is_empty() { - return Ok(true); - } - } - Some(_) => return Ok(false), // make sure that no important result is missed - None => return Ok(false), - } - } + pub fn fill_buffer(&mut self, buf: &mut [u8]) -> Result { + self.pixel_converter.fill_buffer(&mut self.current_frame, buf, &mut |out| self.decoder.decode_next_bytes(out)) } /// Output buffer size pub fn buffer_size(&self) -> usize { - self.line_length() * self.current_frame.height as usize + self.pixel_converter.buffer_size(&self.current_frame) } /// Line length of the current frame pub fn line_length(&self) -> usize { - use self::ColorOutput::*; - match self.color_output { - RGBA => self.current_frame.width as usize * N_CHANNELS, - Indexed => self.current_frame.width as usize, - } + self.pixel_converter.line_length(&self.current_frame) } /// Returns the color palette relevant for the frame that has been decoded @@ -528,7 +419,7 @@ impl Decoder where R: Read { pub fn palette(&self) -> Result<&[u8], DecodingError> { Ok(match self.current_frame.palette { Some(ref table) => table, - None => self.global_palette.as_ref().ok_or(DecodingError::format( + None => self.global_palette().ok_or(DecodingError::format( "no color table available for current frame", ))?, }) @@ -536,7 +427,7 @@ impl Decoder where R: Read { /// The global color palette pub fn global_palette(&self) -> Option<&[u8]> { - self.global_palette.as_deref() + self.pixel_converter.global_palette() } /// Width of the image @@ -610,63 +501,3 @@ impl Iterator for DecoderIter { } } } - -struct InterlaceIterator { - len: usize, - next: usize, - pass: usize, -} - -impl iter::Iterator for InterlaceIterator { - type Item = usize; - - #[inline] - fn next(&mut self) -> Option { - if self.len == 0 { - return None; - } - // although the pass never goes out of bounds thanks to len==0, - // the optimizer doesn't see it. get()? avoids costlier panicking code. - let mut next = self.next + *[8, 8, 4, 2].get(self.pass)?; - while next >= self.len { - debug_assert!(self.pass < 4); - next = *[4, 2, 1, 0].get(self.pass)?; - self.pass += 1; - } - mem::swap(&mut next, &mut self.next); - Some(next) - } -} - -#[cfg(test)] -mod test { - use super::InterlaceIterator; - - #[test] - fn test_interlace_iterator() { - for &(len, expect) in &[ - (0, &[][..]), - (1, &[0][..]), - (2, &[0, 1][..]), - (3, &[0, 2, 1][..]), - (4, &[0, 2, 1, 3][..]), - (5, &[0, 4, 2, 1, 3][..]), - (6, &[0, 4, 2, 1, 3, 5][..]), - (7, &[0, 4, 2, 6, 1, 3, 5][..]), - (8, &[0, 4, 2, 6, 1, 3, 5, 7][..]), - (9, &[0, 8, 4, 2, 6, 1, 3, 5, 7][..]), - (10, &[0, 8, 4, 2, 6, 1, 3, 5, 7, 9][..]), - (11, &[0, 8, 4, 2, 6, 10, 1, 3, 5, 7, 9][..]), - (12, &[0, 8, 4, 2, 6, 10, 1, 3, 5, 7, 9, 11][..]), - (13, &[0, 8, 4, 12, 2, 6, 10, 1, 3, 5, 7, 9, 11][..]), - (14, &[0, 8, 4, 12, 2, 6, 10, 1, 3, 5, 7, 9, 11, 13][..]), - (15, &[0, 8, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13][..]), - (16, &[0, 8, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13, 15][..]), - (17, &[0, 8, 16, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13, 15][..]), - ] { - let iter = InterlaceIterator { len, next: 0, pass: 0 }; - let lines = iter.collect::>(); - assert_eq!(lines, expect); - } - } -} From 8bd439cfd8bc2df68ed4757e9ba9d291f5a7561e Mon Sep 17 00:00:00 2001 From: Kornel Date: Thu, 11 Jan 2024 01:12:52 +0000 Subject: [PATCH 4/4] API to undo make_lzw_pre_encoded --- Cargo.toml | 1 + Changes.md | 2 ++ examples/parallel.rs | 75 +++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 2 +- src/reader/converter.rs | 16 +++++++-- src/reader/decoder.rs | 73 +++++++++++++++++++++++++++++++++++++-- src/reader/mod.rs | 8 ++--- 7 files changed, 166 insertions(+), 11 deletions(-) create mode 100644 examples/parallel.rs diff --git a/Cargo.toml b/Cargo.toml index 71b1ac4..030d393 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ color_quant = { version = "1.1", optional = true } glob = "0.3" criterion = "0.5.1" png = "0.17.10" +rayon = "1.8.0" # for parallel reencoding example [features] default = ["raii_no_panic", "std", "color_quant"] diff --git a/Changes.md b/Changes.md index a62c530..ff67e81 100644 --- a/Changes.md +++ b/Changes.md @@ -7,6 +7,8 @@ Features: It works together with `write_lzw_pre_encoded_frame` for quick rewriting of GIF files. - Added support pre-allocated `Vec`s in `from_palette_pixels` - Added ability to recover the `io::Read`er after decoding. + - Added support for decompressing `Frame.buffer` with LZW data, + which enables fully parallel GIF re-encoding (see examples/parallel.rs), Optimization: - Less buffering, copying, and lower peak memory usage. diff --git a/examples/parallel.rs b/examples/parallel.rs new file mode 100644 index 0000000..893368c --- /dev/null +++ b/examples/parallel.rs @@ -0,0 +1,75 @@ +//! Reencodes GIF in parallel + +use gif::streaming_decoder::FrameDecoder; +use gif::DecodeOptions; +use rayon::iter::ParallelBridge; +use rayon::iter::ParallelIterator; +use std::env; +use std::fs::File; +use std::io::BufWriter; +use std::path::PathBuf; + +fn main() -> Result<(), Box> { + let input_path = PathBuf::from( + env::args_os() + .nth(1) + .ok_or("Specify a GIF path as the first argument")?, + ); + + let input = std::fs::read(&input_path)?; + let input_size = input.len(); + + let start = std::time::Instant::now(); + + let mut options = DecodeOptions::new(); + options.skip_frame_decoding(true); // This gives LZW frames + + let decoder = options.read_info(std::io::Cursor::new(input))?; + let repeat = decoder.repeat(); + let screen_width = decoder.width(); + let screen_height = decoder.height(); + let global_pal = decoder.global_palette().unwrap_or_default().to_vec(); + + let output_file = format!( + "{}-reencoded.gif", + input_path.file_stem().unwrap().to_str().unwrap() + ); + let output = BufWriter::new(File::create(output_file)?); + let mut encoder = gif::Encoder::new(output, screen_width, screen_height, &global_pal)?; + encoder.set_repeat(repeat)?; + + let (send, recv) = std::sync::mpsc::channel(); + + decoder.into_iter().enumerate().par_bridge().try_for_each(move |(frame_number, frame)| { + let mut frame = frame?; + FrameDecoder::new(DecodeOptions::new()) + .decode_lzw_encoded_frame(&mut frame) + .unwrap(); + // frame is now pixels + frame.make_lzw_pre_encoded(); + // frame is now LZW again, re-encoded + send.send((frame_number, frame)).unwrap(); + Ok::<_, gif::DecodingError>(()) + })?; + + // Decoding and encoding can happen in parallel, but writing to the GIF file is sequential + let mut next_frame_number = 0; + let mut frames_to_process = Vec::new(); + for (frame_number, frame) in recv { + // frames can arrive in any order, since they're processed in parallel, + // so they have to be stored in a queue + frames_to_process.push((frame_number, frame)); + while let Some(index) = frames_to_process.iter().position(|&(num, _)| num == next_frame_number) { + let frame = frames_to_process.remove(index).1; + encoder.write_lzw_pre_encoded_frame(&frame)?; + next_frame_number += 1; + } + } + encoder.into_inner()?; + + let seconds = start.elapsed().as_millis() as f64 / 1000.; + let rate = (input_size / 1024 / 1024) as f64 / seconds; + + eprintln!("Finished in {seconds:0.2}s, {rate:0.0}MiB/s {}", if cfg!(debug_assertions) { ". Run with --release for more speed." } else { "" }); + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs index 1a845e7..b883669 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -128,7 +128,7 @@ pub use crate::encoder::{Encoder, ExtensionData, Repeat, EncodingError}; /// Low-level, advanced decoder. Prefer [`Decoder`] instead, which can stream frames too. pub mod streaming_decoder { pub use crate::common::Block; - pub use crate::reader::{StreamingDecoder, OutputBuffer, Decoded, FrameDataType}; + pub use crate::reader::{Decoded, FrameDataType, FrameDecoder, OutputBuffer, StreamingDecoder}; } macro_rules! insert_as_doc { diff --git a/src/reader/converter.rs b/src/reader/converter.rs index bf0ea13..f6183f1 100644 --- a/src/reader/converter.rs +++ b/src/reader/converter.rs @@ -45,7 +45,7 @@ impl PixelConverter { } } - pub(crate) fn read_frame(&mut self, frame: &mut Frame<'_>, data_callback: FillBufferCallback<'_>) -> Result<(), DecodingError> { + pub(crate) fn check_buffer_size(&mut self, frame: &Frame<'_>) -> Result { let pixel_bytes = self.memory_limit .buffer_size(self.color_output, frame.width, frame.height) .ok_or_else(|| io::Error::new(io::ErrorKind::OutOfMemory, "image is too large"))?; @@ -54,7 +54,12 @@ impl PixelConverter { pixel_bytes, self.buffer_size(frame), "Checked computation diverges from required buffer size" ); + Ok(pixel_bytes) + } + #[inline] + pub(crate) fn read_frame(&mut self, frame: &mut Frame<'_>, data_callback: FillBufferCallback<'_>) -> Result<(), DecodingError> { + let pixel_bytes = self.check_buffer_size(frame)?; let mut vec = match mem::replace(&mut frame.buffer, Cow::Borrowed(&[])) { // reuse buffer if possible without reallocating Cow::Owned(mut vec) if vec.capacity() >= pixel_bytes => { @@ -82,7 +87,9 @@ impl PixelConverter { } } - pub(crate) fn fill_buffer(&mut self, current_frame: &mut Frame<'_>, mut buf: &mut [u8], data_callback: FillBufferCallback<'_>) -> Result { + /// Use `read_into_buffer` to deinterlace + #[inline(never)] + pub(crate) fn fill_buffer(&mut self, current_frame: &Frame<'_>, mut buf: &mut [u8], data_callback: FillBufferCallback<'_>) -> Result { loop { let decode_into = match self.color_output { // When decoding indexed data, LZW can write the pixels directly @@ -151,7 +158,10 @@ impl PixelConverter { }; } - pub(crate) fn read_into_buffer(&mut self, frame: &mut Frame<'_>, buf: &mut [u8], data_callback: FillBufferCallback<'_>) -> Result<(), DecodingError> { + /// Applies deinterlacing + /// + /// Set `frame.interlaced = false` afterwards if you're putting the buffer back into the `Frame` + pub(crate) fn read_into_buffer(&mut self, frame: &Frame<'_>, buf: &mut [u8], data_callback: FillBufferCallback<'_>) -> Result<(), DecodingError> { if frame.interlaced { let width = self.line_length(frame); let height = frame.height as usize; diff --git a/src/reader/decoder.rs b/src/reader/decoder.rs index aa0f551..1020dbd 100644 --- a/src/reader/decoder.rs +++ b/src/reader/decoder.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::cmp; use std::error; use std::fmt; @@ -95,7 +96,7 @@ impl From for DecodingError { } } -/// Varies depending on skip_frame_decoding +/// Varies depending on `skip_frame_decoding` #[derive(Debug, Copy, Clone)] pub enum FrameDataType { /// `Frame.buffer` will be regular pixel data @@ -178,6 +179,8 @@ enum State { } use self::State::*; +use super::converter::PixelConverter; + /// U16 values that may occur in a GIF image #[derive(Debug, Copy, Clone)] enum U16Value { @@ -209,6 +212,69 @@ enum ByteValue { CodeSize, } +/// Decoder for `Frame::make_lzw_pre_encoded` +pub struct FrameDecoder { + lzw_reader: LzwReader, + pixel_converter: PixelConverter, +} + +impl FrameDecoder { + /// See also `set_global_palette` + #[inline] + #[must_use] + pub fn new(options: DecodeOptions) -> Self { + Self { + lzw_reader: LzwReader::new(options.check_for_end_code), + pixel_converter: PixelConverter::new(options.color_output, options.memory_limit), + } + } + + /// Palette used for RGBA conversion + #[inline] + pub fn set_global_palette(&mut self, palette: Vec) { + self.pixel_converter.set_global_palette(palette); + } + + /// Converts the frame in-place, replacing its LZW buffer with pixels. + /// + /// If you get an error about invalid min code size, the buffer was probably pixels, not compressed data. + #[inline] + pub fn decode_lzw_encoded_frame(&mut self, frame: &mut Frame<'_>) -> Result<(), DecodingError> { + let pixel_bytes = self.pixel_converter.check_buffer_size(frame)?; + let mut vec = vec![0; pixel_bytes]; + self.decode_lzw_encoded_frame_into_buffer(frame, &mut vec)?; + frame.buffer = Cow::Owned(vec); + frame.interlaced = false; + Ok(()) + } + + /// Converts into the given buffer. It must be [`buffer_size()`] bytes large. + /// + /// Pixels are always deinterlaced, so update `frame.interlaced` afterwards if you're putting the buffer back into the frame. + pub fn decode_lzw_encoded_frame_into_buffer(&mut self, frame: &Frame<'_>, buf: &mut [u8]) -> Result<(), DecodingError> { + let (&min_code_size, mut data) = frame.buffer.split_first().unwrap_or((&2, &[])); + self.lzw_reader.reset(min_code_size)?; + let lzw_reader = &mut self.lzw_reader; + self.pixel_converter.read_into_buffer(frame, buf, &mut move |out| { + loop { + let (bytes_read, bytes_written) = lzw_reader.decode_bytes(data, out)?; + data = &data.get(bytes_read..).unwrap_or_default(); + if bytes_written > 0 || bytes_read == 0 || data.is_empty() { + return Ok(bytes_written) + } + } + })?; + Ok(()) + } + + /// Number of bytes required for `decode_lzw_encoded_frame_into_buffer` + #[inline] + #[must_use] + pub fn buffer_size(&self, frame: &Frame<'_>) -> usize { + self.pixel_converter.buffer_size(frame) + } +} + struct LzwReader { decoder: Option, min_code_size: u8, @@ -225,8 +291,9 @@ impl LzwReader { } pub fn reset(&mut self, min_code_size: u8) -> Result<(), DecodingError> { - // LZW spec: max 12 bits per code - if min_code_size > 11 { + // LZW spec: max 12 bits per code. This check helps catch confusion + // between LZW-compressed buffers and raw pixel data + if min_code_size > 11 || min_code_size < 1 { return Err(DecodingError::format("invalid minimal code size")); } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 5e06a17..a72e462 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -15,7 +15,7 @@ mod converter; pub use self::decoder::{ PLTE_CHANNELS, StreamingDecoder, Decoded, DecodingError, DecodingFormatError, - Version, FrameDataType, OutputBuffer + Version, FrameDataType, OutputBuffer, FrameDecoder }; use self::converter::PixelConverter; @@ -230,9 +230,9 @@ impl ReadDecoder { fn decode_next_bytes(&mut self, out: &mut OutputBuffer<'_>) -> Result { match self.decode_next(out)? { - Some(Decoded::BytesDecoded(len)) => return Ok(len.get()), - Some(Decoded::DataEnd) => return Ok(0), - _ => return Err(DecodingError::format("unexpected data")), + Some(Decoded::BytesDecoded(len)) => Ok(len.get()), + Some(Decoded::DataEnd) => Ok(0), + _ => Err(DecodingError::format("unexpected data")), } } }