Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Separate LZW decoder for fully parallel GIF re-encoding #177

Merged
merged 4 commits into from
Jan 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ color_quant = { version = "1.1", optional = true }
glob = "0.3"
criterion = "0.5.1"
png = "0.17.10"
rayon = "1.8.0" # for parallel reencoding example

[features]
default = ["raii_no_panic", "std", "color_quant"]
Expand Down
2 changes: 2 additions & 0 deletions Changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ Features:
It works together with `write_lzw_pre_encoded_frame` for quick rewriting of GIF files.
- Added support pre-allocated `Vec`s in `from_palette_pixels`
- Added ability to recover the `io::Read`er after decoding.
- Added support for decompressing `Frame.buffer` with LZW data,
which enables fully parallel GIF re-encoding (see examples/parallel.rs),

Optimization:
- Less buffering, copying, and lower peak memory usage.
Expand Down
75 changes: 75 additions & 0 deletions examples/parallel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
//! Reencodes GIF in parallel

use gif::streaming_decoder::FrameDecoder;
use gif::DecodeOptions;
use rayon::iter::ParallelBridge;
use rayon::iter::ParallelIterator;
use std::env;
use std::fs::File;
use std::io::BufWriter;
use std::path::PathBuf;

fn main() -> Result<(), Box<dyn std::error::Error>> {
let input_path = PathBuf::from(
env::args_os()
.nth(1)
.ok_or("Specify a GIF path as the first argument")?,
);

let input = std::fs::read(&input_path)?;
let input_size = input.len();

let start = std::time::Instant::now();

let mut options = DecodeOptions::new();
options.skip_frame_decoding(true); // This gives LZW frames

let decoder = options.read_info(std::io::Cursor::new(input))?;
let repeat = decoder.repeat();
let screen_width = decoder.width();
let screen_height = decoder.height();
let global_pal = decoder.global_palette().unwrap_or_default().to_vec();

let output_file = format!(
"{}-reencoded.gif",
input_path.file_stem().unwrap().to_str().unwrap()
);
let output = BufWriter::new(File::create(output_file)?);
let mut encoder = gif::Encoder::new(output, screen_width, screen_height, &global_pal)?;
encoder.set_repeat(repeat)?;

let (send, recv) = std::sync::mpsc::channel();

decoder.into_iter().enumerate().par_bridge().try_for_each(move |(frame_number, frame)| {
let mut frame = frame?;
FrameDecoder::new(DecodeOptions::new())
.decode_lzw_encoded_frame(&mut frame)
.unwrap();
// frame is now pixels
frame.make_lzw_pre_encoded();
// frame is now LZW again, re-encoded
send.send((frame_number, frame)).unwrap();
Ok::<_, gif::DecodingError>(())
})?;

// Decoding and encoding can happen in parallel, but writing to the GIF file is sequential
let mut next_frame_number = 0;
let mut frames_to_process = Vec::new();
for (frame_number, frame) in recv {
// frames can arrive in any order, since they're processed in parallel,
// so they have to be stored in a queue
frames_to_process.push((frame_number, frame));
while let Some(index) = frames_to_process.iter().position(|&(num, _)| num == next_frame_number) {
let frame = frames_to_process.remove(index).1;
encoder.write_lzw_pre_encoded_frame(&frame)?;
next_frame_number += 1;
}
}
encoder.into_inner()?;

let seconds = start.elapsed().as_millis() as f64 / 1000.;
let rate = (input_size / 1024 / 1024) as f64 / seconds;

eprintln!("Finished in {seconds:0.2}s, {rate:0.0}MiB/s {}", if cfg!(debug_assertions) { ". Run with --release for more speed." } else { "" });
Ok(())
}
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ pub use crate::encoder::{Encoder, ExtensionData, Repeat, EncodingError};
/// Low-level, advanced decoder. Prefer [`Decoder`] instead, which can stream frames too.
pub mod streaming_decoder {
pub use crate::common::Block;
pub use crate::reader::{StreamingDecoder, OutputBuffer, Decoded, FrameDataType};
pub use crate::reader::{Decoded, FrameDataType, FrameDecoder, OutputBuffer, StreamingDecoder};
}

macro_rules! insert_as_doc {
Expand Down
244 changes: 244 additions & 0 deletions src/reader/converter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
use std::borrow::Cow;
use std::io;
use std::mem;
use std::iter;
use crate::common::Frame;
use crate::MemoryLimit;

use super::decoder::{
PLTE_CHANNELS, DecodingError, OutputBuffer
};

pub(crate) const N_CHANNELS: usize = 4;

/// Output mode for the image data
#[derive(Clone, Copy, Debug, PartialEq)]
#[repr(u8)]
pub enum ColorOutput {
/// The decoder expands the image data to 32bit RGBA.
/// This affects:
///
/// - The buffer buffer of the `Frame` returned by [`Decoder::read_next_frame`].
/// - `Decoder::fill_buffer`, `Decoder::buffer_size` and `Decoder::line_length`.
RGBA = 0,
/// The decoder returns the raw indexed data.
Indexed = 1,
}

pub(crate) type FillBufferCallback<'a> = &'a mut dyn FnMut(&mut OutputBuffer<'_>) -> Result<usize, DecodingError>;

/// Deinterlaces and expands to RGBA if needed
pub(crate) struct PixelConverter {
memory_limit: MemoryLimit,
color_output: ColorOutput,
buffer: Vec<u8>,
global_palette: Option<Vec<u8>>,
}

impl PixelConverter {
pub(crate) fn new(color_output: ColorOutput, memory_limit: MemoryLimit) -> Self {
Self {
memory_limit,
color_output,
buffer: Vec::new(),
global_palette: None,
}
}

pub(crate) fn check_buffer_size(&mut self, frame: &Frame<'_>) -> Result<usize, DecodingError> {
let pixel_bytes = self.memory_limit
.buffer_size(self.color_output, frame.width, frame.height)
.ok_or_else(|| io::Error::new(io::ErrorKind::OutOfMemory, "image is too large"))?;

debug_assert_eq!(
pixel_bytes, self.buffer_size(frame),
"Checked computation diverges from required buffer size"
);
Ok(pixel_bytes)
}

#[inline]
pub(crate) fn read_frame(&mut self, frame: &mut Frame<'_>, data_callback: FillBufferCallback<'_>) -> Result<(), DecodingError> {
let pixel_bytes = self.check_buffer_size(frame)?;
let mut vec = match mem::replace(&mut frame.buffer, Cow::Borrowed(&[])) {
// reuse buffer if possible without reallocating
Cow::Owned(mut vec) if vec.capacity() >= pixel_bytes => {
vec.resize(pixel_bytes, 0);
vec
},
// resizing would realloc anyway, and 0-init is faster than a copy
_ => vec![0; pixel_bytes],
};
self.read_into_buffer(frame, &mut vec, data_callback)?;
frame.buffer = Cow::Owned(vec);
frame.interlaced = false;
Ok(())
}

pub(crate) fn buffer_size(&self, frame: &Frame<'_>) -> usize {
self.line_length(frame) * frame.height as usize
}

pub(crate) fn line_length(&self, frame: &Frame<'_>) -> usize {
use self::ColorOutput::*;
match self.color_output {
RGBA => frame.width as usize * N_CHANNELS,
Indexed => frame.width as usize,
}
}

/// Use `read_into_buffer` to deinterlace
#[inline(never)]
pub(crate) fn fill_buffer(&mut self, current_frame: &Frame<'_>, mut buf: &mut [u8], data_callback: FillBufferCallback<'_>) -> Result<bool, DecodingError> {
loop {
let decode_into = match self.color_output {
// When decoding indexed data, LZW can write the pixels directly
ColorOutput::Indexed => &mut buf[..],
// When decoding RGBA, the pixel data will be expanded by a factor of 4,
// and it's simpler to decode indexed pixels to another buffer first
ColorOutput::RGBA => {
let buffer_size = buf.len() / N_CHANNELS;
if buffer_size == 0 {
return Err(DecodingError::format("odd-sized buffer"));
}
if self.buffer.len() < buffer_size {
self.buffer.resize(buffer_size, 0);
}
&mut self.buffer[..buffer_size]
}
};
match data_callback(&mut OutputBuffer::Slice(decode_into))? {
0 => return Ok(false),
bytes_decoded => {
match self.color_output {
ColorOutput::RGBA => {
let transparent = current_frame.transparent;
let palette: &[u8] = current_frame.palette.as_deref()
.or(self.global_palette.as_deref())
.unwrap_or_default(); // next_frame_info already checked it won't happen

let (pixels, rest) = buf.split_at_mut(bytes_decoded * N_CHANNELS);
buf = rest;

for (rgba, idx) in pixels.chunks_exact_mut(N_CHANNELS).zip(self.buffer.iter().copied().take(bytes_decoded)) {
let plte_offset = PLTE_CHANNELS * idx as usize;
if let Some(colors) = palette.get(plte_offset..plte_offset+PLTE_CHANNELS) {
rgba[0] = colors[0];
rgba[1] = colors[1];
rgba[2] = colors[2];
rgba[3] = if let Some(t) = transparent {
if t == idx { 0x00 } else { 0xFF }
} else {
0xFF
};
}
}
},
ColorOutput::Indexed => {
buf = &mut buf[bytes_decoded..];
}
}
if buf.is_empty() {
return Ok(true);
}
},
}
}
}

pub(crate) fn global_palette(&self) -> Option<&[u8]> {
self.global_palette.as_deref()
}

pub(crate) fn set_global_palette(&mut self, palette: Vec<u8>) {
self.global_palette = if !palette.is_empty() {
Some(palette)
} else {
None
};
}

/// Applies deinterlacing
///
/// Set `frame.interlaced = false` afterwards if you're putting the buffer back into the `Frame`
pub(crate) fn read_into_buffer(&mut self, frame: &Frame<'_>, buf: &mut [u8], data_callback: FillBufferCallback<'_>) -> Result<(), DecodingError> {
if frame.interlaced {
let width = self.line_length(frame);
let height = frame.height as usize;
for row in (InterlaceIterator { len: height, next: 0, pass: 0 }) {
let start = row * width;
// Handle a too-small buffer without panicking
let line = buf.get_mut(start .. start + width).ok_or_else(|| DecodingError::format("buffer too small"))?;
if !self.fill_buffer(frame, line, data_callback)? {
return Err(DecodingError::format("image truncated"));
}
}
} else {
let buf = buf.get_mut(..self.buffer_size(frame)).ok_or_else(|| DecodingError::format("buffer too small"))?;
if !self.fill_buffer(frame, buf, data_callback)? {
return Err(DecodingError::format("image truncated"));
}
};
Ok(())
}
}

struct InterlaceIterator {
len: usize,
next: usize,
pass: usize,
}

impl iter::Iterator for InterlaceIterator {
type Item = usize;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.len == 0 {
return None;
}
// although the pass never goes out of bounds thanks to len==0,
// the optimizer doesn't see it. get()? avoids costlier panicking code.
let mut next = self.next + *[8, 8, 4, 2].get(self.pass)?;
while next >= self.len {
debug_assert!(self.pass < 4);
next = *[4, 2, 1, 0].get(self.pass)?;
self.pass += 1;
}
mem::swap(&mut next, &mut self.next);
Some(next)
}
}

#[cfg(test)]
mod test {
use super::InterlaceIterator;

#[test]
fn test_interlace_iterator() {
for &(len, expect) in &[
(0, &[][..]),
(1, &[0][..]),
(2, &[0, 1][..]),
(3, &[0, 2, 1][..]),
(4, &[0, 2, 1, 3][..]),
(5, &[0, 4, 2, 1, 3][..]),
(6, &[0, 4, 2, 1, 3, 5][..]),
(7, &[0, 4, 2, 6, 1, 3, 5][..]),
(8, &[0, 4, 2, 6, 1, 3, 5, 7][..]),
(9, &[0, 8, 4, 2, 6, 1, 3, 5, 7][..]),
(10, &[0, 8, 4, 2, 6, 1, 3, 5, 7, 9][..]),
(11, &[0, 8, 4, 2, 6, 10, 1, 3, 5, 7, 9][..]),
(12, &[0, 8, 4, 2, 6, 10, 1, 3, 5, 7, 9, 11][..]),
(13, &[0, 8, 4, 12, 2, 6, 10, 1, 3, 5, 7, 9, 11][..]),
(14, &[0, 8, 4, 12, 2, 6, 10, 1, 3, 5, 7, 9, 11, 13][..]),
(15, &[0, 8, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13][..]),
(16, &[0, 8, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13, 15][..]),
(17, &[0, 8, 16, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13, 15][..]),
] {
let iter = InterlaceIterator { len, next: 0, pass: 0 };
let lines = iter.collect::<Vec<_>>();
assert_eq!(lines, expect);
}
}
}
Loading
Loading