Skip to content

Commit

Permalink
Merge pull request #41 from akoshchiy/11270-json-path-match
Browse files Browse the repository at this point in the history
feat: jsonpath predicate support
  • Loading branch information
b41sh authored Nov 29, 2023
2 parents 1d7a3e9 + 9b26a09 commit 582c139
Show file tree
Hide file tree
Showing 8 changed files with 309 additions and 22 deletions.
1 change: 1 addition & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ pub enum Error {
InvalidJsonbJEntry,

InvalidJsonPath,
InvalidJsonPathPredicate,
InvalidKeyPath,

Syntax(ParseErrorCode, usize),
Expand Down
11 changes: 11 additions & 0 deletions src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,17 @@ pub fn path_exists<'a>(value: &'a [u8], json_path: JsonPath<'a>) -> bool {
}
}

/// Returns the result of a JSON path predicate check for the specified `JSONB` value.
pub fn path_match<'a>(value: &'a [u8], json_path: JsonPath<'a>) -> Result<bool, Error> {
let selector = Selector::new(json_path, Mode::First);
if !is_jsonb(value) {
let val = parse_value(value)?;
selector.predicate_match(&val.to_vec())
} else {
selector.predicate_match(value)
}
}

/// Get the inner elements of `JSONB` value by JSON path.
/// The return value may contains multiple matching elements.
pub fn get_by_path<'a>(
Expand Down
60 changes: 40 additions & 20 deletions src/jsonpath/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use nom::{
branch::alt,
bytes::complete::{tag, tag_no_case},
character::complete::{char, i32, i64, multispace0, u64},
combinator::{map, opt, value},
combinator::{cond, map, map_res, opt, value},
error::{Error as NomError, ErrorKind},
multi::{many0, separated_list1},
number::complete::double,
Expand Down Expand Up @@ -46,9 +46,10 @@ pub fn parse_json_path(input: &[u8]) -> Result<JsonPath<'_>, Error> {
}

fn json_path(input: &[u8]) -> IResult<&[u8], JsonPath<'_>> {
map(delimited(multispace0, paths, multispace0), |paths| {
JsonPath { paths }
})(input)
map(
delimited(multispace0, predicate_or_paths, multispace0),
|paths| JsonPath { paths },
)(input)
}

fn check_escaped(input: &[u8], i: &mut usize) -> bool {
Expand Down Expand Up @@ -252,6 +253,17 @@ fn path(input: &[u8]) -> IResult<&[u8], Path<'_>> {
))(input)
}

fn predicate_or_paths(input: &[u8]) -> IResult<&[u8], Vec<Path<'_>>> {
alt((predicate, paths))(input)
}

fn predicate(input: &[u8]) -> IResult<&[u8], Vec<Path<'_>>> {
map(
delimited(multispace0, |i| expr_or(i, true), multispace0),
|v| vec![Path::Predicate(Box::new(v))],
)(input)
}

fn paths(input: &[u8]) -> IResult<&[u8], Vec<Path<'_>>> {
map(
pair(opt(pre_path), many0(path)),
Expand All @@ -264,13 +276,17 @@ fn paths(input: &[u8]) -> IResult<&[u8], Vec<Path<'_>>> {
)(input)
}

fn expr_paths(input: &[u8]) -> IResult<&[u8], Vec<Path<'_>>> {
fn expr_paths(input: &[u8], root_predicate: bool) -> IResult<&[u8], Vec<Path<'_>>> {
let parse_current = map_res(
cond(!root_predicate, value(Path::Current, char('@'))),
|res| match res {
Some(v) => Ok(v),
None => Err(NomError::new(input, ErrorKind::Char)),
},
);
map(
pair(
alt((
value(Path::Root, char('$')),
value(Path::Current, char('@')),
)),
alt((value(Path::Root, char('$')), parse_current)),
many0(delimited(multispace0, inner_path, multispace0)),
),
|(pre_path, mut paths)| {
Expand All @@ -284,7 +300,7 @@ fn filter_expr(input: &[u8]) -> IResult<&[u8], Expr<'_>> {
map(
delimited(
delimited(char('?'), multispace0, char('(')),
delimited(multispace0, expr_or, multispace0),
delimited(multispace0, |i| expr_or(i, false), multispace0),
char(')'),
),
|v| v,
Expand Down Expand Up @@ -315,21 +331,21 @@ fn path_value(input: &[u8]) -> IResult<&[u8], PathValue<'_>> {
))(input)
}

fn inner_expr(input: &[u8]) -> IResult<&[u8], Expr<'_>> {
fn inner_expr(input: &[u8], root_predicate: bool) -> IResult<&[u8], Expr<'_>> {
alt((
map(expr_paths, Expr::Paths),
map(|i| expr_paths(i, root_predicate), Expr::Paths),
map(path_value, |v| Expr::Value(Box::new(v))),
))(input)
}

fn expr_atom(input: &[u8]) -> IResult<&[u8], Expr<'_>> {
fn expr_atom(input: &[u8], root_predicate: bool) -> IResult<&[u8], Expr<'_>> {
// TODO, support arithmetic expressions.
alt((
map(
tuple((
delimited(multispace0, inner_expr, multispace0),
delimited(multispace0, |i| inner_expr(i, root_predicate), multispace0),
op,
delimited(multispace0, inner_expr, multispace0),
delimited(multispace0, |i| inner_expr(i, root_predicate), multispace0),
)),
|(left, op, right)| Expr::BinaryOp {
op,
Expand All @@ -340,17 +356,19 @@ fn expr_atom(input: &[u8]) -> IResult<&[u8], Expr<'_>> {
map(
delimited(
terminated(char('('), multispace0),
expr_or,
|i| expr_or(i, root_predicate),
preceded(multispace0, char(')')),
),
|expr| expr,
),
))(input)
}

fn expr_and(input: &[u8]) -> IResult<&[u8], Expr<'_>> {
fn expr_and(input: &[u8], root_predicate: bool) -> IResult<&[u8], Expr<'_>> {
map(
separated_list1(delimited(multispace0, tag("&&"), multispace0), expr_atom),
separated_list1(delimited(multispace0, tag("&&"), multispace0), |i| {
expr_atom(i, root_predicate)
}),
|exprs| {
let mut expr = exprs[0].clone();
for right in exprs.iter().skip(1) {
Expand All @@ -365,9 +383,11 @@ fn expr_and(input: &[u8]) -> IResult<&[u8], Expr<'_>> {
)(input)
}

fn expr_or(input: &[u8]) -> IResult<&[u8], Expr<'_>> {
fn expr_or(input: &[u8], root_predicate: bool) -> IResult<&[u8], Expr<'_>> {
map(
separated_list1(delimited(multispace0, tag("||"), multispace0), expr_and),
separated_list1(delimited(multispace0, tag("||"), multispace0), |i| {
expr_and(i, root_predicate)
}),
|exprs| {
let mut expr = exprs[0].clone();
for right in exprs.iter().skip(1) {
Expand Down
11 changes: 11 additions & 0 deletions src/jsonpath/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ pub struct JsonPath<'a> {
pub paths: Vec<Path<'a>>,
}

impl<'a> JsonPath<'a> {
pub fn is_predicate(&self) -> bool {
self.paths.len() == 1 && matches!(self.paths[0], Path::Predicate(_))
}
}

/// Represents a valid JSON Path.
#[derive(Debug, Clone, PartialEq)]
pub enum Path<'a> {
Expand Down Expand Up @@ -58,6 +64,8 @@ pub enum Path<'a> {
ArrayIndices(Vec<ArrayIndex>),
/// `?(<expression>)` represents selecting all elements in an object or array that match the filter expression, like `$.book[?(@.price < 10)]`.
FilterExpr(Box<Expr<'a>>),
/// `<expression>` standalone filter expression, like `$.book[*].price > 10`.
Predicate(Box<Expr<'a>>),
}

/// Represents the single index in an Array.
Expand Down Expand Up @@ -210,6 +218,9 @@ impl<'a> Display for Path<'a> {
Path::FilterExpr(expr) => {
write!(f, "?({expr})")?;
}
Path::Predicate(expr) => {
write!(f, "{expr}")?;
}
}
Ok(())
}
Expand Down
34 changes: 32 additions & 2 deletions src/jsonpath/selector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ use crate::jsonpath::JsonPath;
use crate::jsonpath::Path;
use crate::jsonpath::PathValue;
use crate::number::Number;
use crate::Error;

use nom::{
bytes::complete::take, combinator::map, multi::count, number::complete::be_u32, IResult,
Expand Down Expand Up @@ -74,6 +75,12 @@ impl<'a> Selector<'a> {

pub fn select(&'a self, root: &'a [u8], data: &mut Vec<u8>, offsets: &mut Vec<u64>) {
let mut poses = self.find_positions(root);

if self.json_path.is_predicate() {
Self::build_predicate_result(&mut poses, data);
return;
}

match self.mode {
Mode::All => Self::build_values(root, &mut poses, data, offsets),
Mode::First => {
Expand All @@ -92,10 +99,21 @@ impl<'a> Selector<'a> {
}

pub fn exists(&'a self, root: &'a [u8]) -> bool {
if self.json_path.is_predicate() {
return true;
}
let poses = self.find_positions(root);
!poses.is_empty()
}

pub fn predicate_match(&'a self, root: &'a [u8]) -> Result<bool, Error> {
if !self.json_path.is_predicate() {
return Err(Error::InvalidJsonPathPredicate);
}
let poses = self.find_positions(root);
Ok(!poses.is_empty())
}

fn find_positions(&'a self, root: &'a [u8]) -> VecDeque<Position> {
let mut poses = VecDeque::new();
poses.push_back(Position::Container((0, root.len())));
Expand All @@ -106,7 +124,7 @@ impl<'a> Selector<'a> {
continue;
}
&Path::Current => unreachable!(),
Path::FilterExpr(expr) => {
Path::FilterExpr(expr) | Path::Predicate(expr) => {
let len = poses.len();
for _ in 0..len {
let pos = poses.pop_front().unwrap();
Expand Down Expand Up @@ -313,6 +331,15 @@ impl<'a> Selector<'a> {
}
}

fn build_predicate_result(poses: &mut VecDeque<Position>, data: &mut Vec<u8>) {
let jentry = match poses.pop_front() {
Some(_) => TRUE_TAG,
None => FALSE_TAG,
};
data.write_u32::<BigEndian>(SCALAR_CONTAINER_TAG).unwrap();
data.write_u32::<BigEndian>(jentry).unwrap();
}

fn build_values(
root: &'a [u8],
poses: &mut VecDeque<Position>,
Expand Down Expand Up @@ -444,7 +471,10 @@ impl<'a> Selector<'a> {

for path in paths.iter().skip(1) {
match path {
&Path::Root | &Path::Current | &Path::FilterExpr(_) => unreachable!(),
&Path::Root
| &Path::Current
| &Path::FilterExpr(_)
| &Path::Predicate(_) => unreachable!(),
_ => {
let len = poses.len();
for _ in 0..len {
Expand Down
39 changes: 39 additions & 0 deletions tests/it/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use std::borrow::Cow;
use std::cmp::Ordering;
use std::collections::BTreeMap;

use jsonb::path_match;
use jsonb::{
array_length, array_values, as_bool, as_null, as_number, as_str, build_array, build_object,
compare, contains, convert_to_comparable, exists_all_keys, exists_any_keys, from_slice,
Expand Down Expand Up @@ -153,6 +154,9 @@ fn test_path_exists() {
r#"$.b[1 to last] ? (@ >=2 && @ <=3)"#,
true,
),
// predicates always return true in path_exists.
(r#"{"a":1,"b":[1,2,3]}"#, r#"$.b[1 to last] > 10"#, true),
(r#"{"a":1,"b":[1,2,3]}"#, r#"$.b[1 to last] > 1"#, true),
];
for (json, path, expect) in sources {
// Check from JSONB
Expand Down Expand Up @@ -224,6 +228,10 @@ fn test_get_by_path() {
),
(r#"$.car_no"#, vec![r#"123"#]),
(r#"$.测试\"\uD83D\uDC8E"#, vec![r#""ab""#]),
// predicates return the result of the filter expression.
(r#"$.phones[0 to last].number == 3720453"#, vec!["true"]),
(r#"$.phones[0 to last].type == "workk""#, vec!["false"]),
(r#"$.name == "Fred" && $.car_no == 123"#, vec!["true"]),
];

let mut buf: Vec<u8> = Vec::new();
Expand Down Expand Up @@ -1185,6 +1193,37 @@ fn test_contains() {
}
}

#[test]
fn test_path_match() {
let sources = vec![
(r#"{"a":1,"b":2}"#, r#"$.a == 1"#, true),
(r#"{"a":1,"b":2}"#, r#"$.a > 1"#, false),
(r#"{"a":1,"b":2}"#, r#"$.c > 0"#, false),
(r#"{"a":1,"b":2}"#, r#"$.b < 2"#, false),
(r#"{"a":1,"b":[1,2,3]}"#, r#"$.b[0] == 1"#, true),
(r#"{"a":1,"b":[1,2,3]}"#, r#"$.b[0] > 1"#, false),
(r#"{"a":1,"b":[1,2,3]}"#, r#"$.b[3] == 0"#, false),
(r#"{"a":1,"b":[1,2,3]}"#, r#"$.b[1 to last] >= 2"#, true),
(
r#"{"a":1,"b":[1,2,3]}"#,
r#"$.b[1 to last] == 2 || $.b[1 to last] == 3"#,
true,
),
];
for (json, predicate, expected) in sources {
let json_path = parse_json_path(predicate.as_bytes()).unwrap();
{
let result = path_match(json.as_bytes(), json_path.clone()).unwrap();
assert_eq!(result, expected);
}
{
let json = parse_value(json.as_bytes()).unwrap().to_vec();
let result = path_match(&json, json_path).unwrap();
assert_eq!(result, expected);
}
}
}

fn init_object<'a>(entries: Vec<(&str, Value<'a>)>) -> Value<'a> {
let mut map = BTreeMap::new();
for (key, val) in entries {
Expand Down
7 changes: 7 additions & 0 deletions tests/it/jsonpath_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ fn test_json_path() {
r#"["k1"]["k2"]"#,
r#"k1.k2:k3"#,
r#"k1["k2"][1]"#,
// predicates
r#"$ > 1"#,
r#"$.* == 0"#,
r#"$[*] > 1"#,
r#"$.a > $.b"#,
r#"$.price > 10 || $.category == "reference""#,
];

for case in cases {
Expand Down Expand Up @@ -74,6 +80,7 @@ fn test_json_path_error() {
r#"$['1','2',]"#,
r#"$['1', ,'3']"#,
r#"$['aaa'}'bbb']"#,
r#"@ > 10"#,
];

for case in cases {
Expand Down
Loading

0 comments on commit 582c139

Please sign in to comment.