Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DRAFT] AVX512 implementation for classification/structural #520

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Justfile
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ build-bin profile="dev": (build-lib profile)
build-lib profile="dev":
cargo build --package rsonpath-lib --profile {{profile}}

build-avx512 profile="dev":
rustup run nightly cargo build --package rsonpath-lib --profile dev

# Build all rsonpath parts, the binary and library.
build-all profile="dev": (build-lib profile) (build-bin profile) (gen-tests)

Expand Down
1 change: 1 addition & 0 deletions crates/rsonpath-lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ default = ["simd"]
arbitrary = ["dep:arbitrary"]
simd = []


[[example]]
name = "approx_spans_usage"
path = "examples/approx_spans_usage.rs"
Expand Down
2 changes: 2 additions & 0 deletions crates/rsonpath-lib/src/classification/depth.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ pub(crate) mod shared;
pub(crate) mod avx2_32;
#[cfg(target_arch = "x86_64")]
pub(crate) mod avx2_64;
#[cfg(target_arch = "x86_64")]
pub(crate) mod avx512_64;
#[cfg(target_arch = "x86")]
pub(crate) mod sse2_32;
#[cfg(target_arch = "x86_64")]
Expand Down
56 changes: 56 additions & 0 deletions crates/rsonpath-lib/src/classification/depth/avx512_64.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
use super::{
shared::{mask_64::DepthVector64, vector_512::DelimiterClassifierImpl512},
*,
};
use crate::{
classification::{QuoteClassifiedBlock, ResumeClassifierBlockState},
debug,
input::InputBlock,
};
use std::marker::PhantomData;

const SIZE: usize = 64;

shared::depth_classifier!(Avx512VectorIterator64, DelimiterClassifierImpl512, DepthVector64, 64, u64);

#[inline(always)]
fn new_vector<'a, B: InputBlock<'a, SIZE>>(
bytes: QuoteClassifiedBlock<B, u64, SIZE>,
classifier: &DelimiterClassifierImpl512,
) -> DepthVector64<'a, B> {
new_vector_from(bytes, classifier, 0)
}

#[inline(always)]
fn new_vector_from<'a, B: InputBlock<'a, SIZE>>(
bytes: QuoteClassifiedBlock<B, u64, SIZE>,
classifier: &DelimiterClassifierImpl512,
idx: usize,
) -> DepthVector64<'a, B> {
// SAFETY: target_feature invariant
unsafe { new_avx512(bytes, classifier, idx) }
}

#[inline(always)]
unsafe fn new_avx512<'a, B: InputBlock<'a, SIZE>>(
bytes: QuoteClassifiedBlock<B, u64, SIZE>,
classifier: &DelimiterClassifierImpl512,
start_idx: usize,
) -> DepthVector64<'a, B> {
let idx_mask = 0xFFFF_FFFF_FFFF_FFFF_u64 << start_idx;
let block = &bytes.block;
let (opening_mask, closing_mask) = classifier.get_opening_and_closing_masks(block);

let opening_mask = opening_mask & (!bytes.within_quotes_mask) & idx_mask;
let closing_mask = closing_mask & (!bytes.within_quotes_mask) & idx_mask;

DepthVector64 {
quote_classified: bytes,
opening_mask,
closing_mask,
opening_count: opening_mask.count_ones(),
depth: 0,
idx: 0,
phantom: PhantomData,
}
}
2 changes: 2 additions & 0 deletions crates/rsonpath-lib/src/classification/depth/shared.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ pub(super) mod mask_64;
pub(super) mod vector_128;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub(super) mod vector_256;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub(super) mod vector_512;

#[allow(unused_macros)]
macro_rules! depth_classifier {
Expand Down
46 changes: 46 additions & 0 deletions crates/rsonpath-lib/src/classification/depth/shared/vector_512.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use crate::classification::structural::BracketType;

#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;

pub(crate) struct DelimiterClassifierImpl512 {
opening: i8,
}

impl DelimiterClassifierImpl512 {
pub(crate) fn new(opening: BracketType) -> Self {
let opening = match opening {
BracketType::Square => b'[',
BracketType::Curly => b'{',
};

Self { opening: opening as i8 }
}

#[inline(always)]
unsafe fn opening_mask(&self) -> __m512i {
_mm512_set1_epi8(self.opening)
}

#[inline(always)]
unsafe fn closing_mask(&self) -> __m512i {
_mm512_set1_epi8(self.opening + 2)
}

#[target_feature(enable = "avx512f")]
#[target_feature(enable = "avx512bw")]
#[inline]
pub(crate) unsafe fn get_opening_and_closing_masks(&self, bytes: &[u8]) -> (u64, u64) {
assert_eq!(64, bytes.len());
// SAFETY: target_feature invariant
unsafe {
let byte_vector = _mm512_loadu_si512(bytes.as_ptr().cast::<i32>());
let opening_mask = _mm512_cmpeq_epi8_mask(byte_vector, self.opening_mask());
let closing_mask = _mm512_cmpeq_epi8_mask(byte_vector, self.closing_mask());

(opening_mask, closing_mask)
}
}
}
27 changes: 27 additions & 0 deletions crates/rsonpath-lib/src/classification/mask.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,16 @@ pub(crate) trait Mask {
Self: Shl<N, Output = Self>;
}

impl Mask for u128 {
#[inline(always)]
fn is_lit<N>(&self, bit: N) -> bool
where
Self: Shl<N, Output = Self>,
{
(*self & (1 << bit)) != 0
}
}

impl Mask for u64 {
#[inline(always)]
fn is_lit<N>(&self, bit: N) -> bool
Expand Down Expand Up @@ -53,3 +63,20 @@ pub(crate) mod m64 {
u64::from(m1) | (u64::from(m2) << 32)
}
}

#[allow(dead_code)]
pub(crate) mod m128 {
pub(crate) fn combine_16(m1: u16, m2: u16, m3: u16, m4: u16,
m5: u16, m6: u16, m7: u16, m8: u16) -> u128 {
u128::from(m1) | (u128::from(m2) << 16) | (u128::from(m3) << 32) | (u128::from(m4) << 48) |
(u128::from(m5) << 64) | (u128::from(m6) << 80) | (u128::from(m7) << 96) | (u128::from(m8) << 112)
}

pub(crate) fn combine_32(m1: u32, m2: u32, m3: u32, m4: u32) -> u128 {
u128::from(m1) | (u128::from(m2) << 32) | (u128::from(m3) << 64) | (u128::from(m4) << 96)
}

pub(crate) fn combine_64(m1: u64, m2: u64) -> u128 {
u128::from(m1) | (u128::from(m2) << 64)
}
}
2 changes: 2 additions & 0 deletions crates/rsonpath-lib/src/classification/memmem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ pub(crate) mod shared;
pub(crate) mod avx2_32;
#[cfg(target_arch = "x86_64")]
pub(crate) mod avx2_64;
#[cfg(target_arch = "x86_64")]
pub(crate) mod avx512_64;
#[cfg(target_arch = "x86")]
pub(crate) mod sse2_32;
#[cfg(target_arch = "x86_64")]
Expand Down
Loading