1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
use sha2::{Digest, Sha256};
#[derive(Debug, PartialEq, PartialOrd)]
pub struct BloomFilter {
bits: Vec<bool>,
nhashes: usize,
}
impl BloomFilter {
pub fn new(size: usize, nhashes: usize) -> Self {
BloomFilter {
bits: vec![false; size],
nhashes,
}
}
pub fn compute_expansion(p: f64) -> f64 {
-1.44 * p.log2()
}
pub fn compute_nhashes(p: f64) -> usize {
(-p.log2()).ceil() as usize
}
pub fn with_false_positive_prob(p: f64, n: usize) -> Self {
Self::new(
(Self::compute_expansion(p) * n as f64).ceil() as usize,
Self::compute_nhashes(p),
)
}
pub fn len(&self) -> usize {
self.bits.len()
}
pub fn nhashes(&self) -> usize {
self.nhashes
}
pub fn bins(&self) -> &[bool] {
&self.bits
}
pub fn as_bytes(&self) -> Vec<u8> {
crate::utils::pack_bits(self.bins())
}
pub fn from_bytes(bytes: &[u8], size: usize, nhashes: usize) -> Self {
let bits = crate::utils::unpack_bits(bytes, size);
BloomFilter { bits, nhashes }
}
pub fn bin<V: AsRef<[u8]>>(value: &V, hash_index: usize) -> usize {
debug_assert_eq!(std::mem::size_of::<usize>(), 8);
let mut h = Sha256::new();
h.update((hash_index as u64).to_le_bytes());
h.update(value);
let hbytes = h.finalize();
u64::from_le_bytes(
<[u8; 8]>::try_from(&hbytes[0..8]).expect("We're getting 8 bytes specifically"),
) as usize
}
pub fn insert<V: AsRef<[u8]>>(&mut self, value: &V) {
for hash_index in 0..self.nhashes {
let i = Self::bin(value, hash_index) % self.len();
self.bits[i] = true;
}
}
pub fn contains<V: AsRef<[u8]>>(&mut self, value: &V) -> bool {
(0..self.nhashes).all(|hash_index| {
let i = Self::bin(value, hash_index) % self.len();
self.bits[i]
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{AesRng, Block};
use rand::Rng;
#[test]
fn test_bloom_filter_membership() {
let mut rng = AesRng::new();
let n = 1000;
let nhashes = 3;
let mut filter = BloomFilter::new(n, nhashes);
for _ in 0..128 {
let x = rng.gen::<Block>();
filter.insert(&x);
assert!(filter.contains(&x));
}
assert_eq!(
filter,
BloomFilter::from_bytes(&filter.as_bytes(), n, nhashes)
);
}
}