diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index b53bbe7abc8..009a7b7a507 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -120,8 +120,41 @@ impl OffsetSizeTrait for i64 { /// (offsets[i], │ ListArray (Array) /// offsets[i+1]) └ ─ ─ ─ ─ ─ ─ ┘ │ /// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ +/// ``` +/// +/// # Slicing +/// +/// Slicing a `ListArray` creates a new `ListArray` without copying any data, +/// but this means the [`Self::values`] and [`Self::offsets`] may have "unused" data /// +/// For example, calling `slice(1, 3)` on the `ListArray` in the above example +/// would result in the following. Note /// +/// 1. `Values` array is unchanged +/// 2. `Offsets` do not start at `0`, nor cover all values in the Values array. +/// +/// ```text +/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ +/// ┌ ─ ─ ─ ─ ─ ─ ┐ │ ╔═══╗ +/// │ ╔═══╗ ╔═══╗ ║ ║ Not used +/// │ ║ 1 ║ ║ A ║ │ 0 │ ╚═══╝ +/// ┌─────────────┐ ┌───────┐ │ ┌───┐ ┌───┐ ╠═══╣ ╠═══╣ +/// │ [] (empty) │ │ (3,3) │ │ 1 │ │ 3 │ │ ║ 1 ║ ║ B ║ │ 1 │ +/// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ╠═══╣ ╠═══╣ +/// │ NULL │ │ (3,4) │ │ 0 │ │ 3 │ │ ║ 1 ║ ║ C ║ │ 2 │ +/// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ╠───╣ ╠───╣ +/// │ [D] │ │ (4,5) │ │ 1 │ │ 4 │ │ │ 0 │ │ ? │ │ 3 │ +/// └─────────────┘ └───────┘ │ └───┘ ├───┤ ├───┤ ├───┤ +/// │ 5 │ │ │ 1 │ │ D │ │ 4 │ +/// │ └───┘ ├───┤ ├───┤ +/// │ │ 0 │ │ ? │ │ 5 │ +/// │ Validity ╠═══╣ ╠═══╣ +/// Logical Logical (nulls) Offsets │ ║ 1 ║ ║ F ║ │ 6 │ +/// Values Offsets │ ╚═══╝ ╚═══╝ +/// │ Values │ │ +/// (offsets[i], │ ListArray (Array) +/// offsets[i+1]) └ ─ ─ ─ ─ ─ ─ ┘ │ +/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ /// ``` /// /// [`StringArray`]: crate::array::StringArray @@ -263,13 +296,22 @@ impl GenericListArray { /// Returns a reference to the offsets of this list /// /// Unlike [`Self::value_offsets`] this returns the [`OffsetBuffer`] - /// allowing for zero-copy cloning + /// allowing for zero-copy cloning. + /// + /// Notes: The `offsets` may not start at 0 and may not cover all values in + /// [`Self::values`]. This can happen when the list array was sliced via + /// [`Self::slice`]. See documentation for [`Self`] for more details. #[inline] pub fn offsets(&self) -> &OffsetBuffer { &self.value_offsets } /// Returns a reference to the values of this list + /// + /// Note: The list array may not refer to all values in the `values` array. + /// For example if the list array was sliced via [`Self::slice`] values will + /// still contain values both before and after the slice. See documentation + /// for [`Self`] for more details. #[inline] pub fn values(&self) -> &ArrayRef { &self.values @@ -296,7 +338,9 @@ impl GenericListArray { self.values.slice(start, end - start) } - /// Returns the offset values in the offsets buffer + /// Returns the offset values in the offsets buffer. + /// + /// See [`Self::offsets`] for more details. #[inline] pub fn value_offsets(&self) -> &[OffsetSize] { &self.value_offsets @@ -325,6 +369,10 @@ impl GenericListArray { } /// Returns a zero-copy slice of this array with the indicated offset and length. + /// + /// Notes: this method does *NOT* slice the underlying values array or modify + /// the values in the offsets buffer. See [`Self::values`] and + /// [`Self::offsets`] for more information. pub fn slice(&self, offset: usize, length: usize) -> Self { Self { data_type: self.data_type.clone(), @@ -556,12 +604,12 @@ impl std::fmt::Debug for GenericListArray; /// A [`GenericListArray`] of variable size lists, storing offsets as `i64`. /// -// See [`LargeListBuilder`](crate::builder::LargeListBuilder) for how to construct a [`LargeListArray`] +/// See [`LargeListBuilder`](crate::builder::LargeListBuilder) for how to construct a [`LargeListArray`] pub type LargeListArray = GenericListArray; #[cfg(test)]