117 lines
2.2 KiB
C
117 lines
2.2 KiB
C
#include "stdinc.h"
|
|
#include "dat.h"
|
|
#include "fns.h"
|
|
|
|
Bloom b;
|
|
|
|
void
|
|
usage(void)
|
|
{
|
|
fprint(2, "usage: fmtbloom [-s size] [-n nblocks | -N nhash] file\n");
|
|
threadexitsall(0);
|
|
}
|
|
|
|
void
|
|
threadmain(int argc, char *argv[])
|
|
{
|
|
Part *part;
|
|
char *file;
|
|
vlong bits, size, size2;
|
|
int nhash;
|
|
vlong nblocks;
|
|
|
|
ventifmtinstall();
|
|
statsinit();
|
|
|
|
size = 0;
|
|
nhash = 0;
|
|
nblocks = 0;
|
|
ARGBEGIN{
|
|
case 'n':
|
|
if(nhash || nblocks)
|
|
usage();
|
|
nblocks = unittoull(EARGF(usage()));
|
|
break;
|
|
case 'N':
|
|
if(nhash || nblocks)
|
|
usage();
|
|
nhash = unittoull(EARGF(usage()));
|
|
if(nhash > BloomMaxHash){
|
|
fprint(2, "maximum possible is -N %d", BloomMaxHash);
|
|
usage();
|
|
}
|
|
break;
|
|
case 's':
|
|
size = unittoull(ARGF());
|
|
if(size == ~0)
|
|
usage();
|
|
break;
|
|
default:
|
|
usage();
|
|
break;
|
|
}ARGEND
|
|
|
|
if(argc != 1)
|
|
usage();
|
|
|
|
file = argv[0];
|
|
|
|
part = initpart(file, ORDWR|ODIRECT);
|
|
if(part == nil)
|
|
sysfatal("can't open partition %s: %r", file);
|
|
|
|
if(size == 0)
|
|
size = part->size;
|
|
|
|
if(size < 1024*1024)
|
|
sysfatal("bloom filter too small");
|
|
|
|
if(size > MaxBloomSize){
|
|
fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n",
|
|
size, (vlong)MaxBloomSize);
|
|
size = MaxBloomSize;
|
|
}
|
|
if(size&(size-1)){
|
|
for(size2=1; size2<size; size2*=2)
|
|
;
|
|
size = size2/2;
|
|
fprint(2, "warning: size not a power of 2; only using %lldMB\n", size/1024/1024);
|
|
}
|
|
|
|
if(nblocks){
|
|
/*
|
|
* no use for more than 32 bits per block
|
|
* shoot for less than 64 bits per block
|
|
*/
|
|
size2 = size;
|
|
while(size2*8 >= nblocks*64)
|
|
size2 >>= 1;
|
|
if(size2 != size){
|
|
size = size2;
|
|
fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n",
|
|
size/1024/1024);
|
|
}
|
|
|
|
/*
|
|
* optimal is to use ln 2 times as many hash functions as we have bits per blocks.
|
|
*/
|
|
bits = (8*size)/nblocks;
|
|
nhash = bits*7/10;
|
|
if(nhash > BloomMaxHash)
|
|
nhash = BloomMaxHash;
|
|
}
|
|
if(!nhash)
|
|
nhash = BloomMaxHash;
|
|
if(bloominit(&b, size, nil) < 0)
|
|
sysfatal("bloominit: %r");
|
|
b.nhash = nhash;
|
|
bits = nhash*10/7;
|
|
nblocks = (8*size)/bits;
|
|
fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size, nhash, nblocks);
|
|
b.data = vtmallocz(size);
|
|
b.part = part;
|
|
if(writebloom(&b) < 0)
|
|
sysfatal("writing %s: %r", file);
|
|
threadexitsall(0);
|
|
}
|