布隆过滤器


1、核心代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
public class SimpleBloomFilter {

private static Logger logger = LoggerFactory.getLogger(SimpleBloomFilter.class);

private static final List<Integer> SEEDS = Arrays.asList(7, 11, 13, 31, 37, 61, 67);

private int defaultSize;
private List<SimpleHash> functions;
private BitSet bits;

public static SimpleBloomFilter of(Collection<String> strings) {
int bitSize = 2 << 15;
if (strings != null && !strings.isEmpty()) {
while (bitSize / strings.size() < 20) {
bitSize *= 2;
}
}

if (logger.isDebugEnabled()) {
logger.debug("SimpleBloomFilter bit size is " + (bitSize / 8 / 1024) + "KB");
}

SimpleBloomFilter filter = new SimpleBloomFilter(bitSize);
if (strings != null) {
for (String s : strings) {
filter.add(s);
}
}
return filter;
}

private SimpleBloomFilter(int defaultSize) {
this.defaultSize = defaultSize;
this.bits = new BitSet(defaultSize);
this.functions = SEEDS.stream().map(o -> new SimpleHash(defaultSize, o)).collect(Collectors.toList());
}

public void add(String value) {
for (SimpleHash f : functions) {
bits.set(f.hash(value), true);
}
}

public boolean contains(String value) {
if (value == null) {
return false;
}
boolean ret = true;
for (SimpleHash f : functions) {
ret = ret && bits.get(f.hash(value));
}
return ret;
}

public static class SimpleHash {

private int cap;
private int seed;

public SimpleHash(int cap, int seed) {
this.cap = cap;
this.seed = seed;
}

public int hash(String value) {
int result = 0;
int len = value.length();
for (int i = 0; i < len; i++) {
result = seed * result + value.charAt(i);
}
return (cap - 1) & result;
}

}

}

2、使用

1
2
3
4
5
6
7
8
9
10
SimpleBloomFilter bloomFilter = SimpleBloomFilter.of(toFilterPostIds);
for (String postId : postIds) {
if (!bloomFilter.contains(postId)) {
result.add(postId);

if (result.size() >= LIST_SIZE) {
break;
}
}
}
打赏
  • 版权声明: 本网站所有文章除特别声明外,均采用 Apache License 2.0 许可协议。转载请注明出处!
  1. © 2020-2021 Lauy    湘ICP备20003709号-1

请我喝杯咖啡吧~

支付宝
微信