diff --git a/Cargo.toml b/Cargo.toml index c51371a..7e6ea25 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,3 +51,6 @@ harness = false name = "get_path" harness = false +[[bench]] +name = "strip_nulls" +harness = false diff --git a/benches/strip_nulls.rs b/benches/strip_nulls.rs new file mode 100644 index 0000000..f0944e6 --- /dev/null +++ b/benches/strip_nulls.rs @@ -0,0 +1,76 @@ +// Copyright 2024 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::{fs, io::Read}; + +use criterion::{criterion_group, criterion_main, Criterion}; +use jsonb::{from_slice, strip_nulls, Value}; + +fn read(file: &str) -> Vec { + let mut f = fs::File::open(file).unwrap(); + let mut data = vec![]; + f.read_to_end(&mut data).unwrap(); + data +} + +fn strip_nulls_deser(data: &[u8]) { + let mut buf = Vec::new(); + let mut json = from_slice(data).unwrap(); + strip_value_nulls(&mut json); + json.write_to_vec(&mut buf); + assert!(!buf.is_empty()); +} + +fn strip_value_nulls(val: &mut Value<'_>) { + match val { + Value::Array(arr) => { + for v in arr { + strip_value_nulls(v); + } + } + Value::Object(ref mut obj) => { + for (_, v) in obj.iter_mut() { + strip_value_nulls(v); + } + obj.retain(|_, v| !matches!(v, Value::Null)); + } + _ => {} + } +} + +fn strip_nulls_fast(data: &[u8]) { + let mut buf = Vec::new(); + strip_nulls(data, &mut buf).unwrap(); + assert!(!buf.is_empty()); +} + +fn add_benchmark(c: &mut Criterion) { + let paths = fs::read_dir("./data/").unwrap(); + for path in paths { + let file = format!("{}", path.unwrap().path().display()); + let bytes = read(&file); + let json = from_slice(&bytes).unwrap().to_vec(); + + c.bench_function(&format!("strip_nulls_deser[{}]", file), |b| { + b.iter(|| strip_nulls_deser(&json)); + }); + + c.bench_function(&format!("strip_nulls_fast[{}]", file), |b| { + b.iter(|| strip_nulls_fast(&json)); + }); + } +} + +criterion_group!(benches, add_benchmark); +criterion_main!(benches); diff --git a/src/builder.rs b/src/builder.rs new file mode 100644 index 0000000..cc4145a --- /dev/null +++ b/src/builder.rs @@ -0,0 +1,149 @@ +// Copyright 2024 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::BTreeMap; + +use byteorder::{BigEndian, WriteBytesExt}; + +use crate::{ + constants::{ARRAY_CONTAINER_TAG, OBJECT_CONTAINER_TAG}, + jentry::JEntry, +}; + +enum Entry<'a> { + ArrayBuilder(ArrayBuilder<'a>), + ObjectBuilder(ObjectBuilder<'a>), + Raw(JEntry, &'a [u8]), +} + +pub(crate) struct ArrayBuilder<'a> { + entries: Vec>, +} + +impl<'a> ArrayBuilder<'a> { + pub(crate) fn new(capacity: usize) -> Self { + Self { + entries: Vec::with_capacity(capacity), + } + } + + pub(crate) fn push_raw(&mut self, jentry: JEntry, data: &'a [u8]) { + self.entries.push(Entry::Raw(jentry, data)); + } + + pub(crate) fn push_array(&mut self, builder: ArrayBuilder<'a>) { + self.entries.push(Entry::ArrayBuilder(builder)); + } + + pub(crate) fn push_object(&mut self, builder: ObjectBuilder<'a>) { + self.entries.push(Entry::ObjectBuilder(builder)); + } + + pub(crate) fn len(&self) -> usize { + self.entries.len() + } + + pub(crate) fn build_into(self, buf: &mut Vec) { + let header = ARRAY_CONTAINER_TAG | self.entries.len() as u32; + buf.write_u32::(header).unwrap(); + + let mut jentry_index = reserve_jentries(buf, self.entries.len() * 4); + + for entry in self.entries.into_iter() { + let jentry = write_entry(buf, entry); + replace_jentry(buf, jentry, &mut jentry_index); + } + } +} + +pub(crate) struct ObjectBuilder<'a> { + entries: BTreeMap<&'a str, Entry<'a>>, +} + +impl<'a> ObjectBuilder<'a> { + pub(crate) fn new() -> Self { + Self { + entries: BTreeMap::new(), + } + } + + pub(crate) fn push_raw(&mut self, key: &'a str, jentry: JEntry, data: &'a [u8]) { + self.entries.insert(key, Entry::Raw(jentry, data)); + } + + pub(crate) fn push_array(&mut self, key: &'a str, builder: ArrayBuilder<'a>) { + self.entries.insert(key, Entry::ArrayBuilder(builder)); + } + + pub(crate) fn push_object(&mut self, key: &'a str, builder: ObjectBuilder<'a>) { + self.entries.insert(key, Entry::ObjectBuilder(builder)); + } + + pub(crate) fn len(&self) -> usize { + self.entries.len() + } + + pub(crate) fn build_into(self, buf: &mut Vec) { + let header = OBJECT_CONTAINER_TAG | self.entries.len() as u32; + buf.write_u32::(header).unwrap(); + + let mut jentry_index = reserve_jentries(buf, self.entries.len() * 8); + + for (key, _) in self.entries.iter() { + let key_len = key.len(); + buf.extend_from_slice(key.as_bytes()); + let jentry = JEntry::make_string_jentry(key_len); + replace_jentry(buf, jentry, &mut jentry_index) + } + + for (_, entry) in self.entries.into_iter() { + let jentry = write_entry(buf, entry); + replace_jentry(buf, jentry, &mut jentry_index); + } + } +} + +fn write_entry(buf: &mut Vec, entry: Entry<'_>) -> JEntry { + match entry { + Entry::ArrayBuilder(builder) => { + let jentry = JEntry::make_container_jentry(builder.len()); + builder.build_into(buf); + jentry + } + Entry::ObjectBuilder(builder) => { + let jentry = JEntry::make_container_jentry(builder.len()); + builder.build_into(buf); + jentry + } + Entry::Raw(jentry, data) => { + buf.extend_from_slice(data); + jentry + } + } +} + +fn reserve_jentries(buf: &mut Vec, len: usize) -> usize { + let old_len = buf.len(); + let new_len = old_len + len; + buf.resize(new_len, 0); + old_len +} + +fn replace_jentry(buf: &mut [u8], jentry: JEntry, jentry_index: &mut usize) { + let jentry_bytes = jentry.encoded().to_be_bytes(); + for (i, b) in jentry_bytes.iter().enumerate() { + buf[*jentry_index + i] = *b; + } + *jentry_index += 4; +} diff --git a/src/functions.rs b/src/functions.rs index cdceed6..935a47a 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -19,9 +19,14 @@ use std::collections::VecDeque; use std::str::from_utf8; use std::str::from_utf8_unchecked; +use crate::builder::ArrayBuilder; +use crate::builder::ObjectBuilder; use crate::constants::*; use crate::error::*; use crate::from_slice; +use crate::iterator::iteate_object_keys; +use crate::iterator::iterate_array; +use crate::iterator::iterate_object_entries; use crate::jentry::JEntry; use crate::jsonpath::JsonPath; use crate::jsonpath::Mode; @@ -1876,13 +1881,150 @@ pub fn traverse_check_string(value: &[u8], func: impl Fn(&[u8]) -> bool) -> bool false } +/// Concatenates two jsonb values. Concatenating two arrays generates an array containing all the elements of each input. +/// Concatenating two objects generates an object containing the union of their keys, taking the second object's value when there are duplicate keys. +/// All other cases are treated by converting a non-array input into a single-element array, and then proceeding as for two arrays. +pub fn concat(left: &[u8], right: &[u8], buf: &mut Vec) -> Result<(), Error> { + if !is_jsonb(left) || !is_jsonb(right) { + let left_val = from_slice(left)?; + let right_val = from_slice(right)?; + let result = concat_values(left_val, right_val); + result.write_to_vec(buf); + return Ok(()); + } + concat_jsonb(left, right, buf) +} + +fn concat_values<'a>(left: Value<'a>, right: Value<'a>) -> Value<'a> { + match (left, right) { + (Value::Object(left), Value::Object(mut right)) => { + let mut result = left; + result.append(&mut right); + Value::Object(result) + } + (Value::Array(left), Value::Array(mut right)) => { + let mut result = left; + result.append(&mut right); + Value::Array(result) + } + (left, Value::Array(mut right)) => { + let mut result = Vec::with_capacity(right.len() + 1); + result.push(left); + result.append(&mut right); + Value::Array(result) + } + (Value::Array(left), right) => { + let mut result = left; + result.push(right); + Value::Array(result) + } + (left, right) => Value::Array(vec![left, right]), + } +} + +fn concat_jsonb(left: &[u8], right: &[u8], buf: &mut Vec) -> Result<(), Error> { + let left_header = read_u32(left, 0)?; + let right_header = read_u32(right, 0)?; + + let left_len = (left_header & CONTAINER_HEADER_LEN_MASK) as usize; + let right_len = (right_header & CONTAINER_HEADER_LEN_MASK) as usize; + + let left_type = left_header & CONTAINER_HEADER_TYPE_MASK; + let right_type = right_header & CONTAINER_HEADER_TYPE_MASK; + + match (left_type, right_type) { + (OBJECT_CONTAINER_TAG, OBJECT_CONTAINER_TAG) => { + let mut builder = ObjectBuilder::new(); + for (key, jentry, item) in iterate_object_entries(left, left_header) { + builder.push_raw(key, jentry, item); + } + for (key, jentry, item) in iterate_object_entries(right, right_header) { + builder.push_raw(key, jentry, item); + } + builder.build_into(buf); + } + (ARRAY_CONTAINER_TAG, ARRAY_CONTAINER_TAG) => { + let mut builder = ArrayBuilder::new(left_len + right_len); + for (jentry, item) in iterate_array(left, left_header) { + builder.push_raw(jentry, item); + } + for (jentry, item) in iterate_array(right, right_header) { + builder.push_raw(jentry, item); + } + builder.build_into(buf); + } + (_, ARRAY_CONTAINER_TAG) => { + let mut builder = ArrayBuilder::new(right_len + 1); + match left_type { + OBJECT_CONTAINER_TAG => { + let jentry = JEntry::make_container_jentry(left_len); + builder.push_raw(jentry, left); + } + _ => { + let jentry = JEntry::decode_jentry(read_u32(left, 4)?); + builder.push_raw(jentry, &left[8..]); + } + }; + for (jentry, item) in iterate_array(right, right_header) { + builder.push_raw(jentry, item); + } + builder.build_into(buf); + } + (ARRAY_CONTAINER_TAG, _) => { + let mut builder = ArrayBuilder::new(left_len + 1); + for (jentry, item) in iterate_array(left, left_header) { + builder.push_raw(jentry, item); + } + match right_type { + OBJECT_CONTAINER_TAG => { + let jentry = JEntry::make_container_jentry(right_len); + builder.push_raw(jentry, right); + } + _ => { + let jentry = JEntry::decode_jentry(read_u32(right, 4)?); + builder.push_raw(jentry, &right[8..]); + } + }; + builder.build_into(buf); + } + (_, _) => { + let mut builder = ArrayBuilder::new(2); + match left_type { + OBJECT_CONTAINER_TAG => { + let jentry = JEntry::make_container_jentry(left_len); + builder.push_raw(jentry, left); + } + _ => { + let jentry = JEntry::decode_jentry(read_u32(left, 4)?); + builder.push_raw(jentry, &left[8..]); + } + }; + match right_type { + OBJECT_CONTAINER_TAG => { + let jentry = JEntry::make_container_jentry(right_len); + builder.push_raw(jentry, right); + } + _ => { + let jentry = JEntry::decode_jentry(read_u32(right, 4)?); + builder.push_raw(jentry, &right[8..]); + } + }; + builder.build_into(buf); + } + } + Ok(()) +} + /// Deletes all object fields that have null values from the given JSON value, recursively. /// Null values that are not object fields are untouched. pub fn strip_nulls(value: &[u8], buf: &mut Vec) -> Result<(), Error> { - let mut json = from_slice(value)?; - strip_value_nulls(&mut json); - json.write_to_vec(buf); - Ok(()) + if !is_jsonb(value) { + let mut json = parse_value(value)?; + strip_value_nulls(&mut json); + json.write_to_vec(buf); + return Ok(()); + } + strip_nulls_jsonb(value, buf) } fn strip_value_nulls(val: &mut Value<'_>) { @@ -1902,6 +2044,70 @@ fn strip_value_nulls(val: &mut Value<'_>) { } } +fn strip_nulls_jsonb(value: &[u8], buf: &mut Vec) -> Result<(), Error> { + let header = read_u32(value, 0)?; + + match header & CONTAINER_HEADER_TYPE_MASK { + OBJECT_CONTAINER_TAG => { + let builder = strip_nulls_object(header, value)?; + builder.build_into(buf); + } + ARRAY_CONTAINER_TAG => { + let builder = strip_nulls_array(header, value)?; + builder.build_into(buf); + } + _ => buf.extend_from_slice(value), + } + Ok(()) +} + +fn strip_nulls_array(header: u32, value: &[u8]) -> Result, Error> { + let len = (header & CONTAINER_HEADER_LEN_MASK) as usize; + let mut builder = ArrayBuilder::new(len); + + for (jentry, item) in iterate_array(value, header) { + match jentry.type_code { + CONTAINER_TAG => { + let item_header = read_u32(item, 0).unwrap(); + match item_header & CONTAINER_HEADER_TYPE_MASK { + OBJECT_CONTAINER_TAG => { + builder.push_object(strip_nulls_object(item_header, item)?); + } + ARRAY_CONTAINER_TAG => { + builder.push_array(strip_nulls_array(item_header, item)?); + } + _ => unreachable!(), + } + } + _ => builder.push_raw(jentry, item), + } + } + Ok(builder) +} + +fn strip_nulls_object(header: u32, value: &[u8]) -> Result, Error> { + let mut builder = ObjectBuilder::new(); + for (key, jentry, item) in iterate_object_entries(value, header) { + match jentry.type_code { + CONTAINER_TAG => { + let item_header = read_u32(item, 0).unwrap(); + match item_header & CONTAINER_HEADER_TYPE_MASK { + OBJECT_CONTAINER_TAG => { + builder.push_object(key, strip_nulls_object(item_header, item)?); + } + ARRAY_CONTAINER_TAG => { + builder.push_array(key, strip_nulls_array(item_header, item)?); + } + _ => unreachable!(), + } + } + NULL_TAG => continue, + _ => builder.push_raw(key, jentry, item), + } + } + Ok(builder) +} + /// Returns the type of the top-level JSON value as a text string. /// Possible types are object, array, string, number, boolean, and null. pub fn type_of(value: &[u8]) -> Result<&'static str, Error> { @@ -1971,159 +2177,3 @@ fn array_contains(arr: &[u8], arr_header: u32, val: &[u8], val_jentry: JEntry) - } false } - -fn iterate_array(value: &[u8], header: u32) -> ArrayIterator<'_> { - let length = (header & CONTAINER_HEADER_LEN_MASK) as usize; - ArrayIterator { - value, - jentry_offset: 4, - val_offset: 4 * length + 4, - length, - idx: 0, - } -} - -fn iteate_object_keys(value: &[u8], header: u32) -> ObjectKeyIterator<'_> { - let length = (header & CONTAINER_HEADER_LEN_MASK) as usize; - ObjectKeyIterator { - value, - jentry_offset: 4, - key_offset: 8 * length + 4, - length, - idx: 0, - } -} - -fn iterate_object_entries(value: &[u8], header: u32) -> ObjectEntryIterator<'_> { - let length = (header & CONTAINER_HEADER_LEN_MASK) as usize; - ObjectEntryIterator { - value, - jentry_offset: 4, - key_offset: 4 + length * 8, - val_offset: 4 + length * 8, - length, - keys: None, - } -} - -struct ArrayIterator<'a> { - value: &'a [u8], - jentry_offset: usize, - val_offset: usize, - length: usize, - idx: usize, -} - -impl<'a> Iterator for ArrayIterator<'a> { - type Item = (JEntry, &'a [u8]); - - fn next(&mut self) -> Option { - if self.idx >= self.length { - return None; - } - let encoded = read_u32(self.value, self.jentry_offset).unwrap(); - let jentry = JEntry::decode_jentry(encoded); - let val_length = jentry.length as usize; - - let item = ( - jentry, - &self.value[self.val_offset..self.val_offset + val_length], - ); - - self.idx += 1; - self.val_offset += val_length; - self.jentry_offset += 4; - - Some(item) - } -} - -struct ObjectKeyIterator<'a> { - value: &'a [u8], - jentry_offset: usize, - key_offset: usize, - length: usize, - idx: usize, -} - -impl<'a> Iterator for ObjectKeyIterator<'a> { - type Item = &'a str; - - fn next(&mut self) -> Option { - if self.idx >= self.length { - return None; - } - - let encoded = read_u32(self.value, self.jentry_offset).unwrap(); - let jentry = JEntry::decode_jentry(encoded); - let key_length = jentry.length as usize; - - let key = unsafe { - from_utf8_unchecked(&self.value[self.key_offset..self.key_offset + key_length]) - }; - - self.idx += 1; - self.key_offset += key_length; - self.jentry_offset += 4; - - Some(key) - } -} - -struct ObjectEntryIterator<'a> { - value: &'a [u8], - jentry_offset: usize, - key_offset: usize, - val_offset: usize, - length: usize, - keys: Option>, -} - -impl<'a> Iterator for ObjectEntryIterator<'a> { - type Item = (&'a str, JEntry, &'a [u8]); - - fn next(&mut self) -> Option { - if self.keys.is_none() { - self.fill_keys(); - } - match self.keys.as_mut().unwrap().pop_front() { - Some(key_jentry) => { - let prev_key_offset = self.key_offset; - self.key_offset += key_jentry.length as usize; - - let key = unsafe { - std::str::from_utf8_unchecked(&self.value[prev_key_offset..self.key_offset]) - }; - - let val_encoded = read_u32(self.value, self.jentry_offset).unwrap(); - let val_jentry = JEntry::decode_jentry(val_encoded); - let val_length = val_jentry.length as usize; - - let val = - &self.value[self.val_offset..self.val_offset + val_jentry.length as usize]; - let result = (key, val_jentry, val); - - self.jentry_offset += 4; - self.val_offset += val_length; - - Some(result) - } - None => None, - } - } -} - -impl<'a> ObjectEntryIterator<'a> { - fn fill_keys(&mut self) { - let mut keys: VecDeque = VecDeque::with_capacity(self.length); - for _ in 0..self.length { - let encoded = read_u32(self.value, self.jentry_offset).unwrap(); - let key_jentry = JEntry::decode_jentry(encoded); - - self.jentry_offset += 4; - self.val_offset += key_jentry.length as usize; - keys.push_back(key_jentry); - } - self.keys = Some(keys); - } -} diff --git a/src/iterator.rs b/src/iterator.rs new file mode 100644 index 0000000..54d6711 --- /dev/null +++ b/src/iterator.rs @@ -0,0 +1,182 @@ +// Copyright 2024 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::{collections::VecDeque, str::from_utf8_unchecked}; + +use crate::{constants::CONTAINER_HEADER_LEN_MASK, jentry::JEntry, Error}; + +pub(crate) fn iterate_array(value: &[u8], header: u32) -> ArrayIterator<'_> { + let length = (header & CONTAINER_HEADER_LEN_MASK) as usize; + ArrayIterator { + value, + jentry_offset: 4, + val_offset: 4 * length + 4, + length, + idx: 0, + } +} + +pub(crate) fn iteate_object_keys(value: &[u8], header: u32) -> ObjectKeyIterator<'_> { + let length = (header & CONTAINER_HEADER_LEN_MASK) as usize; + ObjectKeyIterator { + value, + jentry_offset: 4, + key_offset: 8 * length + 4, + length, + idx: 0, + } +} + +pub(crate) fn iterate_object_entries(value: &[u8], header: u32) -> ObjectEntryIterator<'_> { + let length = (header & CONTAINER_HEADER_LEN_MASK) as usize; + ObjectEntryIterator { + value, + jentry_offset: 4, + key_offset: 4 + length * 8, + val_offset: 4 + length * 8, + length, + keys: None, + } +} + +pub(crate) struct ArrayIterator<'a> { + value: &'a [u8], + jentry_offset: usize, + val_offset: usize, + length: usize, + idx: usize, +} + +impl<'a> Iterator for ArrayIterator<'a> { + type Item = (JEntry, &'a [u8]); + + fn next(&mut self) -> Option { + if self.idx >= self.length { + return None; + } + let encoded = read_u32(self.value, self.jentry_offset).unwrap(); + let jentry = JEntry::decode_jentry(encoded); + let val_length = jentry.length as usize; + + let item = ( + jentry, + &self.value[self.val_offset..self.val_offset + val_length], + ); + + self.idx += 1; + self.val_offset += val_length; + self.jentry_offset += 4; + + Some(item) + } +} + +pub(crate) struct ObjectKeyIterator<'a> { + value: &'a [u8], + jentry_offset: usize, + key_offset: usize, + length: usize, + idx: usize, +} + +impl<'a> Iterator for ObjectKeyIterator<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option { + if self.idx >= self.length { + return None; + } + + let encoded = read_u32(self.value, self.jentry_offset).unwrap(); + let jentry = JEntry::decode_jentry(encoded); + let key_length = jentry.length as usize; + + let key = unsafe { + from_utf8_unchecked(&self.value[self.key_offset..self.key_offset + key_length]) + }; + + self.idx += 1; + self.key_offset += key_length; + self.jentry_offset += 4; + + Some(key) + } +} + +pub(crate) struct ObjectEntryIterator<'a> { + value: &'a [u8], + jentry_offset: usize, + key_offset: usize, + val_offset: usize, + length: usize, + keys: Option>, +} + +impl<'a> Iterator for ObjectEntryIterator<'a> { + type Item = (&'a str, JEntry, &'a [u8]); + + fn next(&mut self) -> Option { + if self.keys.is_none() { + self.fill_keys(); + } + match self.keys.as_mut().unwrap().pop_front() { + Some(key_jentry) => { + let prev_key_offset = self.key_offset; + self.key_offset += key_jentry.length as usize; + + let key = unsafe { + std::str::from_utf8_unchecked(&self.value[prev_key_offset..self.key_offset]) + }; + + let val_encoded = read_u32(self.value, self.jentry_offset).unwrap(); + let val_jentry = JEntry::decode_jentry(val_encoded); + let val_length = val_jentry.length as usize; + + let val = + &self.value[self.val_offset..self.val_offset + val_jentry.length as usize]; + let result = (key, val_jentry, val); + + self.jentry_offset += 4; + self.val_offset += val_length; + + Some(result) + } + None => None, + } + } +} + +impl<'a> ObjectEntryIterator<'a> { + fn fill_keys(&mut self) { + let mut keys: VecDeque = VecDeque::with_capacity(self.length); + for _ in 0..self.length { + let encoded = read_u32(self.value, self.jentry_offset).unwrap(); + let key_jentry = JEntry::decode_jentry(encoded); + + self.jentry_offset += 4; + self.val_offset += key_jentry.length as usize; + keys.push_back(key_jentry); + } + self.keys = Some(keys); + } +} + +fn read_u32(buf: &[u8], idx: usize) -> Result { + let bytes: [u8; 4] = buf + .get(idx..idx + 4) + .ok_or(Error::InvalidEOF)? + .try_into() + .unwrap(); + Ok(u32::from_be_bytes(bytes)) +} diff --git a/src/lib.rs b/src/lib.rs index 79e4638..4e463a3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -63,11 +63,13 @@ #![allow(clippy::uninlined_format_args)] +mod builder; mod constants; mod de; mod error; mod from; mod functions; +mod iterator; mod jentry; pub mod jsonpath; pub mod keypath; diff --git a/tests/it/functions.rs b/tests/it/functions.rs index 31e734a..1e95635 100644 --- a/tests/it/functions.rs +++ b/tests/it/functions.rs @@ -16,14 +16,13 @@ use std::borrow::Cow; use std::cmp::Ordering; use std::collections::BTreeMap; -use jsonb::path_match; use jsonb::{ array_length, array_values, as_bool, as_null, as_number, as_str, build_array, build_object, - compare, contains, convert_to_comparable, exists_all_keys, exists_any_keys, from_slice, + compare, concat, contains, convert_to_comparable, exists_all_keys, exists_any_keys, from_slice, get_by_index, get_by_keypath, get_by_name, get_by_path, is_array, is_object, - keypath::parse_key_paths, object_each, object_keys, parse_value, path_exists, strip_nulls, - to_bool, to_f64, to_i64, to_pretty_string, to_str, to_string, to_u64, traverse_check_string, - type_of, Number, Object, Value, + keypath::parse_key_paths, object_each, object_keys, parse_value, path_exists, path_match, + strip_nulls, to_bool, to_f64, to_i64, to_pretty_string, to_str, to_string, to_u64, + traverse_check_string, type_of, Number, Object, Value, }; use jsonb::jsonpath::parse_json_path; @@ -1224,6 +1223,67 @@ fn test_path_match() { } } +#[test] +fn test_concat() { + let sources = vec![ + ("null", "null", "[null,null]"), + ("true", "null", "[true,null]"), + ("1", r#""asdasd""#, r#"[1,"asdasd"]"#), + (r#""asd""#, r#"[1,2,3]"#, r#"["asd",1,2,3]"#), + (r#"[1,2,3]"#, r#""asd""#, r#"[1,2,3,"asd"]"#), + ( + r#"[1,{"a":1,"b":2,"c":[1,2,3]},3]"#, + r#""asd""#, + r#"[1,{"a":1,"b":2,"c":[1,2,3]},3,"asd"]"#, + ), + ( + r#"[1,{"a":1,"b":2,"c":[1,2,3]},3]"#, + r#"[10,20,30]"#, + r#"[1,{"a":1,"b":2,"c":[1,2,3]},3,10,20,30]"#, + ), + ( + r#"[1,[1,2,3],3]"#, + r#"[[10,20,30]]"#, + r#"[1,[1,2,3],3,[10,20,30]]"#, + ), + (r#"{"a":1,"b":2}"#, r#"true"#, r#"[{"a":1,"b":2},true]"#), + (r#"[1,2,3]"#, r#"{"a":1,"b":2}"#, r#"[1,2,3,{"a":1,"b":2}]"#), + (r#"{"a":1,"b":2}"#, r#"[1,2,3]"#, r#"[{"a":1,"b":2},1,2,3]"#), + ( + r#"{"a":1,"b":2}"#, + r#"{"c":3,"d":4}"#, + r#"{"a":1,"b":2,"c":3,"d":4}"#, + ), + ( + r#"{"a":1,"b":2,"d":10}"#, + r#"{"a":3,"b":4}"#, + r#"{"a":3,"b":4,"d":10}"#, + ), + ]; + for (left, right, result) in sources { + { + let mut buf = Vec::new(); + + concat(left.as_bytes(), right.as_bytes(), &mut buf).unwrap(); + + let actual = from_slice(&buf).unwrap(); + let expected = parse_value(result.as_bytes()).unwrap(); + assert_eq!(actual, expected); + } + { + let mut buf = Vec::new(); + let left_json = parse_value(left.as_bytes()).unwrap().to_vec(); + let right_json = parse_value(right.as_bytes()).unwrap().to_vec(); + + concat(&left_json, &right_json, &mut buf).unwrap(); + + let actual = from_slice(&buf).unwrap(); + let expected = parse_value(result.as_bytes()).unwrap(); + assert_eq!(actual, expected); + } + } +} + fn init_object<'a>(entries: Vec<(&str, Value<'a>)>) -> Value<'a> { let mut map = BTreeMap::new(); for (key, val) in entries {