Skip to content

Commit

Permalink
Merge pull request #177 from kornelski/parallel-reencoder
Browse files Browse the repository at this point in the history
Separate LZW decoder for fully parallel GIF re-encoding
  • Loading branch information
kornelski authored Jan 13, 2024
2 parents 422edba + 8bd439c commit db12002
Show file tree
Hide file tree
Showing 7 changed files with 433 additions and 211 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ color_quant = { version = "1.1", optional = true }
glob = "0.3"
criterion = "0.5.1"
png = "0.17.10"
rayon = "1.8.0" # for parallel reencoding example

[features]
default = ["raii_no_panic", "std", "color_quant"]
Expand Down
2 changes: 2 additions & 0 deletions Changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ Features:
It works together with `write_lzw_pre_encoded_frame` for quick rewriting of GIF files.
- Added support pre-allocated `Vec`s in `from_palette_pixels`
- Added ability to recover the `io::Read`er after decoding.
- Added support for decompressing `Frame.buffer` with LZW data,
which enables fully parallel GIF re-encoding (see examples/parallel.rs),

Optimization:
- Less buffering, copying, and lower peak memory usage.
Expand Down
75 changes: 75 additions & 0 deletions examples/parallel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
//! Reencodes GIF in parallel
use gif::streaming_decoder::FrameDecoder;
use gif::DecodeOptions;
use rayon::iter::ParallelBridge;
use rayon::iter::ParallelIterator;
use std::env;
use std::fs::File;
use std::io::BufWriter;
use std::path::PathBuf;

fn main() -> Result<(), Box<dyn std::error::Error>> {
let input_path = PathBuf::from(
env::args_os()
.nth(1)
.ok_or("Specify a GIF path as the first argument")?,
);

let input = std::fs::read(&input_path)?;
let input_size = input.len();

let start = std::time::Instant::now();

let mut options = DecodeOptions::new();
options.skip_frame_decoding(true); // This gives LZW frames

let decoder = options.read_info(std::io::Cursor::new(input))?;
let repeat = decoder.repeat();
let screen_width = decoder.width();
let screen_height = decoder.height();
let global_pal = decoder.global_palette().unwrap_or_default().to_vec();

let output_file = format!(
"{}-reencoded.gif",
input_path.file_stem().unwrap().to_str().unwrap()
);
let output = BufWriter::new(File::create(output_file)?);
let mut encoder = gif::Encoder::new(output, screen_width, screen_height, &global_pal)?;
encoder.set_repeat(repeat)?;

let (send, recv) = std::sync::mpsc::channel();

decoder.into_iter().enumerate().par_bridge().try_for_each(move |(frame_number, frame)| {
let mut frame = frame?;
FrameDecoder::new(DecodeOptions::new())
.decode_lzw_encoded_frame(&mut frame)
.unwrap();
// frame is now pixels
frame.make_lzw_pre_encoded();
// frame is now LZW again, re-encoded
send.send((frame_number, frame)).unwrap();
Ok::<_, gif::DecodingError>(())
})?;

// Decoding and encoding can happen in parallel, but writing to the GIF file is sequential
let mut next_frame_number = 0;
let mut frames_to_process = Vec::new();
for (frame_number, frame) in recv {
// frames can arrive in any order, since they're processed in parallel,
// so they have to be stored in a queue
frames_to_process.push((frame_number, frame));
while let Some(index) = frames_to_process.iter().position(|&(num, _)| num == next_frame_number) {
let frame = frames_to_process.remove(index).1;
encoder.write_lzw_pre_encoded_frame(&frame)?;
next_frame_number += 1;
}
}
encoder.into_inner()?;

let seconds = start.elapsed().as_millis() as f64 / 1000.;
let rate = (input_size / 1024 / 1024) as f64 / seconds;

eprintln!("Finished in {seconds:0.2}s, {rate:0.0}MiB/s {}", if cfg!(debug_assertions) { ". Run with --release for more speed." } else { "" });
Ok(())
}
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ pub use crate::encoder::{Encoder, ExtensionData, Repeat, EncodingError};
/// Low-level, advanced decoder. Prefer [`Decoder`] instead, which can stream frames too.
pub mod streaming_decoder {
pub use crate::common::Block;
pub use crate::reader::{StreamingDecoder, OutputBuffer, Decoded, FrameDataType};
pub use crate::reader::{Decoded, FrameDataType, FrameDecoder, OutputBuffer, StreamingDecoder};
}

macro_rules! insert_as_doc {
Expand Down
244 changes: 244 additions & 0 deletions src/reader/converter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
use std::borrow::Cow;
use std::io;
use std::mem;
use std::iter;
use crate::common::Frame;
use crate::MemoryLimit;

use super::decoder::{
PLTE_CHANNELS, DecodingError, OutputBuffer
};

pub(crate) const N_CHANNELS: usize = 4;

/// Output mode for the image data
#[derive(Clone, Copy, Debug, PartialEq)]
#[repr(u8)]
pub enum ColorOutput {
/// The decoder expands the image data to 32bit RGBA.
/// This affects:
///
/// - The buffer buffer of the `Frame` returned by [`Decoder::read_next_frame`].
/// - `Decoder::fill_buffer`, `Decoder::buffer_size` and `Decoder::line_length`.
RGBA = 0,
/// The decoder returns the raw indexed data.
Indexed = 1,
}

pub(crate) type FillBufferCallback<'a> = &'a mut dyn FnMut(&mut OutputBuffer<'_>) -> Result<usize, DecodingError>;

/// Deinterlaces and expands to RGBA if needed
pub(crate) struct PixelConverter {
memory_limit: MemoryLimit,
color_output: ColorOutput,
buffer: Vec<u8>,
global_palette: Option<Vec<u8>>,
}

impl PixelConverter {
pub(crate) fn new(color_output: ColorOutput, memory_limit: MemoryLimit) -> Self {
Self {
memory_limit,
color_output,
buffer: Vec::new(),
global_palette: None,
}
}

pub(crate) fn check_buffer_size(&mut self, frame: &Frame<'_>) -> Result<usize, DecodingError> {
let pixel_bytes = self.memory_limit
.buffer_size(self.color_output, frame.width, frame.height)
.ok_or_else(|| io::Error::new(io::ErrorKind::OutOfMemory, "image is too large"))?;

debug_assert_eq!(
pixel_bytes, self.buffer_size(frame),
"Checked computation diverges from required buffer size"
);
Ok(pixel_bytes)
}

#[inline]
pub(crate) fn read_frame(&mut self, frame: &mut Frame<'_>, data_callback: FillBufferCallback<'_>) -> Result<(), DecodingError> {
let pixel_bytes = self.check_buffer_size(frame)?;
let mut vec = match mem::replace(&mut frame.buffer, Cow::Borrowed(&[])) {
// reuse buffer if possible without reallocating
Cow::Owned(mut vec) if vec.capacity() >= pixel_bytes => {
vec.resize(pixel_bytes, 0);
vec
},
// resizing would realloc anyway, and 0-init is faster than a copy
_ => vec![0; pixel_bytes],
};
self.read_into_buffer(frame, &mut vec, data_callback)?;
frame.buffer = Cow::Owned(vec);
frame.interlaced = false;
Ok(())
}

pub(crate) fn buffer_size(&self, frame: &Frame<'_>) -> usize {
self.line_length(frame) * frame.height as usize
}

pub(crate) fn line_length(&self, frame: &Frame<'_>) -> usize {
use self::ColorOutput::*;
match self.color_output {
RGBA => frame.width as usize * N_CHANNELS,
Indexed => frame.width as usize,
}
}

/// Use `read_into_buffer` to deinterlace
#[inline(never)]
pub(crate) fn fill_buffer(&mut self, current_frame: &Frame<'_>, mut buf: &mut [u8], data_callback: FillBufferCallback<'_>) -> Result<bool, DecodingError> {
loop {
let decode_into = match self.color_output {
// When decoding indexed data, LZW can write the pixels directly
ColorOutput::Indexed => &mut buf[..],
// When decoding RGBA, the pixel data will be expanded by a factor of 4,
// and it's simpler to decode indexed pixels to another buffer first
ColorOutput::RGBA => {
let buffer_size = buf.len() / N_CHANNELS;
if buffer_size == 0 {
return Err(DecodingError::format("odd-sized buffer"));
}
if self.buffer.len() < buffer_size {
self.buffer.resize(buffer_size, 0);
}
&mut self.buffer[..buffer_size]
}
};
match data_callback(&mut OutputBuffer::Slice(decode_into))? {
0 => return Ok(false),
bytes_decoded => {
match self.color_output {
ColorOutput::RGBA => {
let transparent = current_frame.transparent;
let palette: &[u8] = current_frame.palette.as_deref()
.or(self.global_palette.as_deref())
.unwrap_or_default(); // next_frame_info already checked it won't happen

let (pixels, rest) = buf.split_at_mut(bytes_decoded * N_CHANNELS);
buf = rest;

for (rgba, idx) in pixels.chunks_exact_mut(N_CHANNELS).zip(self.buffer.iter().copied().take(bytes_decoded)) {
let plte_offset = PLTE_CHANNELS * idx as usize;
if let Some(colors) = palette.get(plte_offset..plte_offset+PLTE_CHANNELS) {
rgba[0] = colors[0];
rgba[1] = colors[1];
rgba[2] = colors[2];
rgba[3] = if let Some(t) = transparent {
if t == idx { 0x00 } else { 0xFF }
} else {
0xFF
};
}
}
},
ColorOutput::Indexed => {
buf = &mut buf[bytes_decoded..];
}
}
if buf.is_empty() {
return Ok(true);
}
},
}
}
}

pub(crate) fn global_palette(&self) -> Option<&[u8]> {
self.global_palette.as_deref()
}

pub(crate) fn set_global_palette(&mut self, palette: Vec<u8>) {
self.global_palette = if !palette.is_empty() {
Some(palette)
} else {
None
};
}

/// Applies deinterlacing
///
/// Set `frame.interlaced = false` afterwards if you're putting the buffer back into the `Frame`
pub(crate) fn read_into_buffer(&mut self, frame: &Frame<'_>, buf: &mut [u8], data_callback: FillBufferCallback<'_>) -> Result<(), DecodingError> {
if frame.interlaced {
let width = self.line_length(frame);
let height = frame.height as usize;
for row in (InterlaceIterator { len: height, next: 0, pass: 0 }) {
let start = row * width;
// Handle a too-small buffer without panicking
let line = buf.get_mut(start .. start + width).ok_or_else(|| DecodingError::format("buffer too small"))?;
if !self.fill_buffer(frame, line, data_callback)? {
return Err(DecodingError::format("image truncated"));
}
}
} else {
let buf = buf.get_mut(..self.buffer_size(frame)).ok_or_else(|| DecodingError::format("buffer too small"))?;
if !self.fill_buffer(frame, buf, data_callback)? {
return Err(DecodingError::format("image truncated"));
}
};
Ok(())
}
}

struct InterlaceIterator {
len: usize,
next: usize,
pass: usize,
}

impl iter::Iterator for InterlaceIterator {
type Item = usize;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.len == 0 {
return None;
}
// although the pass never goes out of bounds thanks to len==0,
// the optimizer doesn't see it. get()? avoids costlier panicking code.
let mut next = self.next + *[8, 8, 4, 2].get(self.pass)?;
while next >= self.len {
debug_assert!(self.pass < 4);
next = *[4, 2, 1, 0].get(self.pass)?;
self.pass += 1;
}
mem::swap(&mut next, &mut self.next);
Some(next)
}
}

#[cfg(test)]
mod test {
use super::InterlaceIterator;

#[test]
fn test_interlace_iterator() {
for &(len, expect) in &[
(0, &[][..]),
(1, &[0][..]),
(2, &[0, 1][..]),
(3, &[0, 2, 1][..]),
(4, &[0, 2, 1, 3][..]),
(5, &[0, 4, 2, 1, 3][..]),
(6, &[0, 4, 2, 1, 3, 5][..]),
(7, &[0, 4, 2, 6, 1, 3, 5][..]),
(8, &[0, 4, 2, 6, 1, 3, 5, 7][..]),
(9, &[0, 8, 4, 2, 6, 1, 3, 5, 7][..]),
(10, &[0, 8, 4, 2, 6, 1, 3, 5, 7, 9][..]),
(11, &[0, 8, 4, 2, 6, 10, 1, 3, 5, 7, 9][..]),
(12, &[0, 8, 4, 2, 6, 10, 1, 3, 5, 7, 9, 11][..]),
(13, &[0, 8, 4, 12, 2, 6, 10, 1, 3, 5, 7, 9, 11][..]),
(14, &[0, 8, 4, 12, 2, 6, 10, 1, 3, 5, 7, 9, 11, 13][..]),
(15, &[0, 8, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13][..]),
(16, &[0, 8, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13, 15][..]),
(17, &[0, 8, 16, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13, 15][..]),
] {
let iter = InterlaceIterator { len, next: 0, pass: 0 };
let lines = iter.collect::<Vec<_>>();
assert_eq!(lines, expect);
}
}
}
Loading

0 comments on commit db12002

Please sign in to comment.