-
Notifications
You must be signed in to change notification settings - Fork 40
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #177 from kornelski/parallel-reencoder
Separate LZW decoder for fully parallel GIF re-encoding
- Loading branch information
Showing
7 changed files
with
433 additions
and
211 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
//! Reencodes GIF in parallel | ||
use gif::streaming_decoder::FrameDecoder; | ||
use gif::DecodeOptions; | ||
use rayon::iter::ParallelBridge; | ||
use rayon::iter::ParallelIterator; | ||
use std::env; | ||
use std::fs::File; | ||
use std::io::BufWriter; | ||
use std::path::PathBuf; | ||
|
||
fn main() -> Result<(), Box<dyn std::error::Error>> { | ||
let input_path = PathBuf::from( | ||
env::args_os() | ||
.nth(1) | ||
.ok_or("Specify a GIF path as the first argument")?, | ||
); | ||
|
||
let input = std::fs::read(&input_path)?; | ||
let input_size = input.len(); | ||
|
||
let start = std::time::Instant::now(); | ||
|
||
let mut options = DecodeOptions::new(); | ||
options.skip_frame_decoding(true); // This gives LZW frames | ||
|
||
let decoder = options.read_info(std::io::Cursor::new(input))?; | ||
let repeat = decoder.repeat(); | ||
let screen_width = decoder.width(); | ||
let screen_height = decoder.height(); | ||
let global_pal = decoder.global_palette().unwrap_or_default().to_vec(); | ||
|
||
let output_file = format!( | ||
"{}-reencoded.gif", | ||
input_path.file_stem().unwrap().to_str().unwrap() | ||
); | ||
let output = BufWriter::new(File::create(output_file)?); | ||
let mut encoder = gif::Encoder::new(output, screen_width, screen_height, &global_pal)?; | ||
encoder.set_repeat(repeat)?; | ||
|
||
let (send, recv) = std::sync::mpsc::channel(); | ||
|
||
decoder.into_iter().enumerate().par_bridge().try_for_each(move |(frame_number, frame)| { | ||
let mut frame = frame?; | ||
FrameDecoder::new(DecodeOptions::new()) | ||
.decode_lzw_encoded_frame(&mut frame) | ||
.unwrap(); | ||
// frame is now pixels | ||
frame.make_lzw_pre_encoded(); | ||
// frame is now LZW again, re-encoded | ||
send.send((frame_number, frame)).unwrap(); | ||
Ok::<_, gif::DecodingError>(()) | ||
})?; | ||
|
||
// Decoding and encoding can happen in parallel, but writing to the GIF file is sequential | ||
let mut next_frame_number = 0; | ||
let mut frames_to_process = Vec::new(); | ||
for (frame_number, frame) in recv { | ||
// frames can arrive in any order, since they're processed in parallel, | ||
// so they have to be stored in a queue | ||
frames_to_process.push((frame_number, frame)); | ||
while let Some(index) = frames_to_process.iter().position(|&(num, _)| num == next_frame_number) { | ||
let frame = frames_to_process.remove(index).1; | ||
encoder.write_lzw_pre_encoded_frame(&frame)?; | ||
next_frame_number += 1; | ||
} | ||
} | ||
encoder.into_inner()?; | ||
|
||
let seconds = start.elapsed().as_millis() as f64 / 1000.; | ||
let rate = (input_size / 1024 / 1024) as f64 / seconds; | ||
|
||
eprintln!("Finished in {seconds:0.2}s, {rate:0.0}MiB/s {}", if cfg!(debug_assertions) { ". Run with --release for more speed." } else { "" }); | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,244 @@ | ||
use std::borrow::Cow; | ||
use std::io; | ||
use std::mem; | ||
use std::iter; | ||
use crate::common::Frame; | ||
use crate::MemoryLimit; | ||
|
||
use super::decoder::{ | ||
PLTE_CHANNELS, DecodingError, OutputBuffer | ||
}; | ||
|
||
pub(crate) const N_CHANNELS: usize = 4; | ||
|
||
/// Output mode for the image data | ||
#[derive(Clone, Copy, Debug, PartialEq)] | ||
#[repr(u8)] | ||
pub enum ColorOutput { | ||
/// The decoder expands the image data to 32bit RGBA. | ||
/// This affects: | ||
/// | ||
/// - The buffer buffer of the `Frame` returned by [`Decoder::read_next_frame`]. | ||
/// - `Decoder::fill_buffer`, `Decoder::buffer_size` and `Decoder::line_length`. | ||
RGBA = 0, | ||
/// The decoder returns the raw indexed data. | ||
Indexed = 1, | ||
} | ||
|
||
pub(crate) type FillBufferCallback<'a> = &'a mut dyn FnMut(&mut OutputBuffer<'_>) -> Result<usize, DecodingError>; | ||
|
||
/// Deinterlaces and expands to RGBA if needed | ||
pub(crate) struct PixelConverter { | ||
memory_limit: MemoryLimit, | ||
color_output: ColorOutput, | ||
buffer: Vec<u8>, | ||
global_palette: Option<Vec<u8>>, | ||
} | ||
|
||
impl PixelConverter { | ||
pub(crate) fn new(color_output: ColorOutput, memory_limit: MemoryLimit) -> Self { | ||
Self { | ||
memory_limit, | ||
color_output, | ||
buffer: Vec::new(), | ||
global_palette: None, | ||
} | ||
} | ||
|
||
pub(crate) fn check_buffer_size(&mut self, frame: &Frame<'_>) -> Result<usize, DecodingError> { | ||
let pixel_bytes = self.memory_limit | ||
.buffer_size(self.color_output, frame.width, frame.height) | ||
.ok_or_else(|| io::Error::new(io::ErrorKind::OutOfMemory, "image is too large"))?; | ||
|
||
debug_assert_eq!( | ||
pixel_bytes, self.buffer_size(frame), | ||
"Checked computation diverges from required buffer size" | ||
); | ||
Ok(pixel_bytes) | ||
} | ||
|
||
#[inline] | ||
pub(crate) fn read_frame(&mut self, frame: &mut Frame<'_>, data_callback: FillBufferCallback<'_>) -> Result<(), DecodingError> { | ||
let pixel_bytes = self.check_buffer_size(frame)?; | ||
let mut vec = match mem::replace(&mut frame.buffer, Cow::Borrowed(&[])) { | ||
// reuse buffer if possible without reallocating | ||
Cow::Owned(mut vec) if vec.capacity() >= pixel_bytes => { | ||
vec.resize(pixel_bytes, 0); | ||
vec | ||
}, | ||
// resizing would realloc anyway, and 0-init is faster than a copy | ||
_ => vec![0; pixel_bytes], | ||
}; | ||
self.read_into_buffer(frame, &mut vec, data_callback)?; | ||
frame.buffer = Cow::Owned(vec); | ||
frame.interlaced = false; | ||
Ok(()) | ||
} | ||
|
||
pub(crate) fn buffer_size(&self, frame: &Frame<'_>) -> usize { | ||
self.line_length(frame) * frame.height as usize | ||
} | ||
|
||
pub(crate) fn line_length(&self, frame: &Frame<'_>) -> usize { | ||
use self::ColorOutput::*; | ||
match self.color_output { | ||
RGBA => frame.width as usize * N_CHANNELS, | ||
Indexed => frame.width as usize, | ||
} | ||
} | ||
|
||
/// Use `read_into_buffer` to deinterlace | ||
#[inline(never)] | ||
pub(crate) fn fill_buffer(&mut self, current_frame: &Frame<'_>, mut buf: &mut [u8], data_callback: FillBufferCallback<'_>) -> Result<bool, DecodingError> { | ||
loop { | ||
let decode_into = match self.color_output { | ||
// When decoding indexed data, LZW can write the pixels directly | ||
ColorOutput::Indexed => &mut buf[..], | ||
// When decoding RGBA, the pixel data will be expanded by a factor of 4, | ||
// and it's simpler to decode indexed pixels to another buffer first | ||
ColorOutput::RGBA => { | ||
let buffer_size = buf.len() / N_CHANNELS; | ||
if buffer_size == 0 { | ||
return Err(DecodingError::format("odd-sized buffer")); | ||
} | ||
if self.buffer.len() < buffer_size { | ||
self.buffer.resize(buffer_size, 0); | ||
} | ||
&mut self.buffer[..buffer_size] | ||
} | ||
}; | ||
match data_callback(&mut OutputBuffer::Slice(decode_into))? { | ||
0 => return Ok(false), | ||
bytes_decoded => { | ||
match self.color_output { | ||
ColorOutput::RGBA => { | ||
let transparent = current_frame.transparent; | ||
let palette: &[u8] = current_frame.palette.as_deref() | ||
.or(self.global_palette.as_deref()) | ||
.unwrap_or_default(); // next_frame_info already checked it won't happen | ||
|
||
let (pixels, rest) = buf.split_at_mut(bytes_decoded * N_CHANNELS); | ||
buf = rest; | ||
|
||
for (rgba, idx) in pixels.chunks_exact_mut(N_CHANNELS).zip(self.buffer.iter().copied().take(bytes_decoded)) { | ||
let plte_offset = PLTE_CHANNELS * idx as usize; | ||
if let Some(colors) = palette.get(plte_offset..plte_offset+PLTE_CHANNELS) { | ||
rgba[0] = colors[0]; | ||
rgba[1] = colors[1]; | ||
rgba[2] = colors[2]; | ||
rgba[3] = if let Some(t) = transparent { | ||
if t == idx { 0x00 } else { 0xFF } | ||
} else { | ||
0xFF | ||
}; | ||
} | ||
} | ||
}, | ||
ColorOutput::Indexed => { | ||
buf = &mut buf[bytes_decoded..]; | ||
} | ||
} | ||
if buf.is_empty() { | ||
return Ok(true); | ||
} | ||
}, | ||
} | ||
} | ||
} | ||
|
||
pub(crate) fn global_palette(&self) -> Option<&[u8]> { | ||
self.global_palette.as_deref() | ||
} | ||
|
||
pub(crate) fn set_global_palette(&mut self, palette: Vec<u8>) { | ||
self.global_palette = if !palette.is_empty() { | ||
Some(palette) | ||
} else { | ||
None | ||
}; | ||
} | ||
|
||
/// Applies deinterlacing | ||
/// | ||
/// Set `frame.interlaced = false` afterwards if you're putting the buffer back into the `Frame` | ||
pub(crate) fn read_into_buffer(&mut self, frame: &Frame<'_>, buf: &mut [u8], data_callback: FillBufferCallback<'_>) -> Result<(), DecodingError> { | ||
if frame.interlaced { | ||
let width = self.line_length(frame); | ||
let height = frame.height as usize; | ||
for row in (InterlaceIterator { len: height, next: 0, pass: 0 }) { | ||
let start = row * width; | ||
// Handle a too-small buffer without panicking | ||
let line = buf.get_mut(start .. start + width).ok_or_else(|| DecodingError::format("buffer too small"))?; | ||
if !self.fill_buffer(frame, line, data_callback)? { | ||
return Err(DecodingError::format("image truncated")); | ||
} | ||
} | ||
} else { | ||
let buf = buf.get_mut(..self.buffer_size(frame)).ok_or_else(|| DecodingError::format("buffer too small"))?; | ||
if !self.fill_buffer(frame, buf, data_callback)? { | ||
return Err(DecodingError::format("image truncated")); | ||
} | ||
}; | ||
Ok(()) | ||
} | ||
} | ||
|
||
struct InterlaceIterator { | ||
len: usize, | ||
next: usize, | ||
pass: usize, | ||
} | ||
|
||
impl iter::Iterator for InterlaceIterator { | ||
type Item = usize; | ||
|
||
#[inline] | ||
fn next(&mut self) -> Option<Self::Item> { | ||
if self.len == 0 { | ||
return None; | ||
} | ||
// although the pass never goes out of bounds thanks to len==0, | ||
// the optimizer doesn't see it. get()? avoids costlier panicking code. | ||
let mut next = self.next + *[8, 8, 4, 2].get(self.pass)?; | ||
while next >= self.len { | ||
debug_assert!(self.pass < 4); | ||
next = *[4, 2, 1, 0].get(self.pass)?; | ||
self.pass += 1; | ||
} | ||
mem::swap(&mut next, &mut self.next); | ||
Some(next) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
use super::InterlaceIterator; | ||
|
||
#[test] | ||
fn test_interlace_iterator() { | ||
for &(len, expect) in &[ | ||
(0, &[][..]), | ||
(1, &[0][..]), | ||
(2, &[0, 1][..]), | ||
(3, &[0, 2, 1][..]), | ||
(4, &[0, 2, 1, 3][..]), | ||
(5, &[0, 4, 2, 1, 3][..]), | ||
(6, &[0, 4, 2, 1, 3, 5][..]), | ||
(7, &[0, 4, 2, 6, 1, 3, 5][..]), | ||
(8, &[0, 4, 2, 6, 1, 3, 5, 7][..]), | ||
(9, &[0, 8, 4, 2, 6, 1, 3, 5, 7][..]), | ||
(10, &[0, 8, 4, 2, 6, 1, 3, 5, 7, 9][..]), | ||
(11, &[0, 8, 4, 2, 6, 10, 1, 3, 5, 7, 9][..]), | ||
(12, &[0, 8, 4, 2, 6, 10, 1, 3, 5, 7, 9, 11][..]), | ||
(13, &[0, 8, 4, 12, 2, 6, 10, 1, 3, 5, 7, 9, 11][..]), | ||
(14, &[0, 8, 4, 12, 2, 6, 10, 1, 3, 5, 7, 9, 11, 13][..]), | ||
(15, &[0, 8, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13][..]), | ||
(16, &[0, 8, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13, 15][..]), | ||
(17, &[0, 8, 16, 4, 12, 2, 6, 10, 14, 1, 3, 5, 7, 9, 11, 13, 15][..]), | ||
] { | ||
let iter = InterlaceIterator { len, next: 0, pass: 0 }; | ||
let lines = iter.collect::<Vec<_>>(); | ||
assert_eq!(lines, expect); | ||
} | ||
} | ||
} |
Oops, something went wrong.