Skip to content

Commit

Permalink
fix fingerprints
Browse files Browse the repository at this point in the history
  • Loading branch information
arthurprs committed Jul 22, 2024
1 parent 8d91322 commit 9ecb85e
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 25 deletions.
14 changes: 7 additions & 7 deletions benches/benches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ use test::Bencher;

#[bench]
fn bench_new(b: &mut Bencher) {
b.iter(|| Filter::new(1000, 0.005));
b.iter(|| Filter::new(1000, 0.005).unwrap());
}
#[bench]
fn bench_get_ok_medium(b: &mut Bencher) {
let mut f = Filter::new(100000, 0.01);
let mut f = Filter::new(100000, 0.01).unwrap();
for i in 0..f.capacity() {
f.insert_duplicated(&i).unwrap();
}
Expand All @@ -23,7 +23,7 @@ fn bench_get_ok_medium(b: &mut Bencher) {

#[bench]
fn bench_get_nok_medium(b: &mut Bencher) {
let mut f = Filter::new(100000, 0.01);
let mut f = Filter::new(100000, 0.01).unwrap();
for i in 0..f.capacity() {
f.insert_duplicated(&i).unwrap();
}
Expand All @@ -37,7 +37,7 @@ fn bench_get_nok_medium(b: &mut Bencher) {
#[bench]
fn bench_grow(b: &mut Bencher) {
b.iter(|| {
let mut f = Filter::new(10000, 0.01);
let mut f = Filter::new(10000, 0.01).unwrap();
for i in 0..f.capacity() {
f.insert_duplicated(i).unwrap();
}
Expand All @@ -47,7 +47,7 @@ fn bench_grow(b: &mut Bencher) {

#[bench]
fn bench_grow_from_90pct(b: &mut Bencher) {
let mut f = Filter::new(10000, 0.01);
let mut f = Filter::new(10000, 0.01).unwrap();
for i in 0..f.capacity() / 10 * 9 {
f.insert_duplicated(i).unwrap();
}
Expand All @@ -63,7 +63,7 @@ fn bench_grow_from_90pct(b: &mut Bencher) {
#[bench]
fn bench_grow_resizeable(b: &mut Bencher) {
b.iter(|| {
let mut f = Filter::new_resizeable(0, 10000, 0.01);
let mut f = Filter::new_resizeable(0, 10000, 0.01).unwrap();
for i in 0u64.. {
if f.insert_duplicated(i).is_err() {
break;
Expand All @@ -76,7 +76,7 @@ fn bench_grow_resizeable(b: &mut Bencher) {

#[bench]
fn bench_shrink(b: &mut Bencher) {
let mut f = Filter::new(10000, 0.01);
let mut f = Filter::new(10000, 0.01).unwrap();
for i in 0..f.capacity() {
let _ = f.insert(i);
}
Expand Down
18 changes: 13 additions & 5 deletions fuzz/fuzz_targets/fuzz_fingerprint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,7 @@ struct Input {
}

fuzz_target!(|input: Input| {
let Input {
cap,
ops,
fp_size,
} = input;
let Input { cap, ops, fp_size } = input;
// The "Model", tracks the count for each item
let mut counts = [0u64; (u16::MAX as usize) + 1];
let Ok(mut f) = qfilter::Filter::with_fingerprint_size(cap as u64, fp_size.clamp(7, 64)) else {
Expand Down Expand Up @@ -60,6 +56,18 @@ fuzz_target!(|input: Input| {
let est = f.count_fingerprint(e as u64);
assert!(est >= min, "{e}: est {est} < min {min} shrunk {shrunk:?}");
}
let prints = f.fingerprints().collect::<Vec<_>>();
let mut expected_prints = counts
.iter()
.enumerate()
.flat_map(|(i, n)| {
let t = (i as u64) << (64 - f.fingerprint_size()) >> (64 - f.fingerprint_size());
std::iter::repeat(t).take(*n as usize)
})
.collect::<Vec<_>>();
expected_prints.sort_unstable();
assert_eq!(prints.len(), f.len() as usize);
assert_eq!(prints, expected_prints);
if !CHECK_SHRUNK {
break;
}
Expand Down
26 changes: 13 additions & 13 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -334,14 +334,14 @@ pub struct FingerprintIter<'a> {
}

impl<'a> FingerprintIter<'a> {
fn new(filter: &'a Filter, ordered: bool) -> Self {
fn new(filter: &'a Filter) -> Self {
let mut iter = FingerprintIter {
filter,
q_bucket_idx: 0,
r_bucket_idx: 0,
remaining: filter.len,
};
if ordered && !filter.is_empty() {
if !filter.is_empty() {
while !filter.is_occupied(iter.q_bucket_idx) {
iter.q_bucket_idx += 1;
}
Expand Down Expand Up @@ -1248,13 +1248,13 @@ impl Filter {

/// Returns an iterator over the fingerprints stored in the filter.
///
/// If `ordered` is true the fingerprints will be returned in ascending order.
pub fn fingerprints(&self, ordered: bool) -> FingerprintIter {
FingerprintIter::new(self, ordered)
/// Fingerprints will be returned in ascending order.
pub fn fingerprints(&self) -> FingerprintIter {
FingerprintIter::new(self)
}

/// Shrinks the capacity of the finger as much as possible while preserving
/// the false positive ratios
/// Shrinks the capacity of the filter as much as possible while preserving
/// the false positive ratios and fingerprint size.
pub fn shrink_to_fit(&mut self) {
if self.total_blocks().get() > 1 && self.len() <= self.capacity() / 2 {
let mut new = Self::with_qr(
Expand All @@ -1263,10 +1263,11 @@ impl Filter {
)
.unwrap();
new.max_qbits = self.max_qbits;
for hash in self.fingerprints(true) {
for hash in self.fingerprints() {
let _ = new.insert_fingerprint(true, hash);
}
assert_eq!(new.len, self.len);
debug_assert_eq!(new.len, self.len);
debug_assert_eq!(new.fingerprint_size(), self.fingerprint_size());
*self = new;
}
}
Expand All @@ -1288,7 +1289,7 @@ impl Filter {
if other.fingerprint_size() < self.fingerprint_size() {
return Err(Error::IncompatibleFingerprintSize);
}
for hash in other.fingerprints(true) {
for hash in other.fingerprints() {
self.insert_fingerprint(keep_duplicates, hash)?;
}
Ok(())
Expand All @@ -1312,7 +1313,7 @@ impl Filter {
let rbits = NonZeroU8::new(self.rbits.get() - 1).unwrap();
let mut new = Self::with_qr(qbits, rbits).unwrap();
new.max_qbits = self.max_qbits;
for hash in self.fingerprints(true) {
for hash in self.fingerprints() {
new.insert_fingerprint(true, hash).unwrap();
}
assert_eq!(self.len, new.len);
Expand Down Expand Up @@ -1739,9 +1740,8 @@ mod tests {
for h in fingerprints {
filter.insert_fingerprint(true, h).unwrap();
}
let mut out = filter.fingerprints(true).collect::<Vec<_>>();
let out: Vec<u64> = filter.fingerprints().collect::<Vec<_>>();
let mut expect = fingerprints.map(|h| h << (64 - fip_size) >> (64 - fip_size));
out.sort_unstable();
expect.sort_unstable();
assert_eq!(out, expect);
}
Expand Down

0 comments on commit 9ecb85e

Please sign in to comment.