return of venti

This commit is contained in:
rsc 2005-07-12 15:23:36 +00:00
parent 88bb285e3d
commit a0d146edd7
68 changed files with 14443 additions and 2 deletions

170
src/cmd/venti/copy.c Normal file
View File

@ -0,0 +1,170 @@
#include <u.h>
#include <libc.h>
#include <venti.h>
#include <libsec.h>
#include <thread.h>
int changes;
int rewrite;
int ignoreerrors;
int fast;
int verbose;
VtConn *zsrc, *zdst;
void
usage(void)
{
fprint(2, "usage: copy [-fir] [-t type] srchost dsthost score\n");
threadexitsall("usage");
}
void
walk(uchar score[VtScoreSize], uint type, int base)
{
int i, n;
uchar *buf;
VtEntry e;
VtRoot root;
if(memcmp(score, vtzeroscore, VtScoreSize) == 0)
return;
buf = vtmallocz(VtMaxLumpSize);
if(fast && vtread(zdst, score, type, buf, VtMaxLumpSize) >= 0){
if(verbose)
fprint(2, "skip %V\n", score);
free(buf);
return;
}
n = vtread(zsrc, score, type, buf, VtMaxLumpSize);
if(n < 0){
if(rewrite){
changes++;
memmove(score, vtzeroscore, VtScoreSize);
}else if(!ignoreerrors)
sysfatal("reading block %V (type %d): %r", type, score);
return;
}
switch(type){
case VtRootType:
if(vtrootunpack(&root, buf) < 0){
fprint(2, "warning: could not unpack root in %V %d\n", score, type);
break;
}
walk(root.score, VtDirType, 0);
walk(root.prev, VtRootType, 0);
vtrootpack(&root, buf); /* walk might have changed score */
break;
case VtDirType:
for(i=0; i<n/VtEntrySize; i++){
if(vtentryunpack(&e, buf, i) < 0){
fprint(2, "warning: could not unpack entry #%d in %V %d\n", i, score, type);
continue;
}
if(!(e.flags & VtEntryActive))
continue;
walk(e.score, e.type, e.type&VtTypeBaseMask);
vtentrypack(&e, buf, i);
}
break;
case VtDataType:
break;
default: /* pointers */
for(i=0; i<n; i+=VtScoreSize)
if(memcmp(buf+i, vtzeroscore, VtScoreSize) != 0)
walk(buf+i, type-1, base);
break;
}
if(vtwrite(zdst, score, type, buf, n) < 0){
/* figure out score for better error message */
/* can't use input argument - might have changed contents */
n = vtzerotruncate(type, buf, n);
sha1(buf, n, score, nil);
sysfatal("writing block %V (type %d): %r", score, type);
}
free(buf);
}
void
threadmain(int argc, char *argv[])
{
int type, n;
uchar score[VtScoreSize];
uchar *buf;
char *prefix;
fmtinstall('F', vtfcallfmt);
fmtinstall('V', vtscorefmt);
type = -1;
ARGBEGIN{
case 'f':
fast = 1;
break;
case 'i':
if(rewrite)
usage();
ignoreerrors = 1;
break;
case 'r':
if(ignoreerrors)
usage();
rewrite = 1;
break;
case 't':
type = atoi(EARGF(usage()));
break;
default:
usage();
break;
}ARGEND
if(argc != 3)
usage();
if(vtparsescore(argv[2], &prefix, score) < 0)
sysfatal("could not parse score: %r");
buf = vtmallocz(VtMaxLumpSize);
zsrc = vtdial(argv[0]);
if(zsrc == nil)
sysfatal("could not dial src server: %r");
if(vtconnect(zsrc) < 0)
sysfatal("vtconnect src: %r");
zdst = vtdial(argv[1]);
if(zdst == nil)
sysfatal("could not dial dst server: %r");
if(vtconnect(zdst) < 0)
sysfatal("vtconnect dst: %r");
if(type != -1){
n = vtread(zsrc, score, type, buf, VtMaxLumpSize);
if(n < 0)
sysfatal("could not read block: %r");
}else{
for(type=0; type<VtMaxType; type++){
n = vtread(zsrc, score, type, buf, VtMaxLumpSize);
if(n >= 0)
break;
}
if(type == VtMaxType)
sysfatal("could not find block %V of any type", score);
}
walk(score, type, VtDirType);
if(changes)
print("%s:%V (%d pointers rewritten)\n", prefix, score, changes);
if(vtsync(zdst) < 0)
sysfatal("could not sync dst server: %r");
threadexitsall(0);
}

80
src/cmd/venti/devnull.c Normal file
View File

@ -0,0 +1,80 @@
/* Copyright (c) 2004 Russ Cox */
#include <u.h>
#include <libc.h>
#include <venti.h>
#include <thread.h>
#include <libsec.h>
#ifndef _UNISTD_H_
#pragma varargck type "F" VtFcall*
#pragma varargck type "T" void
#endif
int verbose;
enum
{
STACK = 8192,
};
void
usage(void)
{
fprint(2, "usage: venti/devnull [-v] [-a address]\n");
threadexitsall("usage");
}
void
threadmain(int argc, char **argv)
{
VtReq *r;
VtSrv *srv;
char *address;
Packet *p;
fmtinstall('V', vtscorefmt);
fmtinstall('F', vtfcallfmt);
address = "tcp!*!venti";
ARGBEGIN{
case 'v':
verbose++;
break;
case 'a':
address = EARGF(usage());
break;
default:
usage();
}ARGEND
srv = vtlisten(address);
if(srv == nil)
sysfatal("vtlisten %s: %r", argv[1]);
while((r = vtgetreq(srv)) != nil){
r->rx.msgtype = r->tx.msgtype+1;
if(verbose)
fprint(2, "<- %F\n", &r->tx);
switch(r->tx.msgtype){
case VtTping:
break;
case VtTgoodbye:
break;
case VtTread:
r->rx.error = vtstrdup("no such block");
r->rx.msgtype = VtRerror;
break;
case VtTwrite:
packetsha1(r->tx.data, r->rx.score);
break;
case VtTsync:
break;
}
if(verbose)
fprint(2, "-> %F\n", &r->rx);
vtrespond(r);
}
threadexitsall(nil);
}

View File

@ -1,3 +1,13 @@
%:VQ:
echo venti will return once it is debugged.
<$PLAN9/src/mkhdr
DIRS=srv
TARG=\
copy\
read\
sync\
write\
<$PLAN9/src/mkmany
<$PLAN9/src/mkdirs

59
src/cmd/venti/mkroot.c Normal file
View File

@ -0,0 +1,59 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
char *host;
void
usage(void)
{
fprint(2, "usage: mkroot [-h host] name type score blocksize prev\n");
threadexitsall("usage");
}
void
threadmain(int argc, char *argv[])
{
uchar score[VtScoreSize];
uchar buf[VtRootSize];
VtConn *z;
VtRoot root;
ARGBEGIN{
case 'h':
host = EARGF(usage());
break;
default:
usage();
break;
}ARGEND
if(argc != 5)
usage();
ventifmtinstall();
strecpy(root.name, root.name+sizeof root.name, argv[0]);
strecpy(root.type, root.type+sizeof root.type, argv[1]);
if(vtparsescore(argv[2], strlen(argv[2]), nil, root.score) < 0)
sysfatal("bad score '%s'", argv[2]);
root.blocksize = atoi(argv[3]);
if(vtparsescore(argv[4], strlen(argv[4]), nil, root.prev) < 0)
sysfatal("bad score '%s'", argv[4]);
vtrootpack(&root, buf);
z = vtdial(host);
if(z == nil)
sysfatal("could not connect to server: %r");
if(vtconnect(z) < 0)
sysfatal("vtconnect: %r");
if(vtwrite(z, score, VtRootType, buf, VtRootSize) < 0)
sysfatal("vtwrite: %r");
if(vtsync(z) < 0)
sysfatal("vtsync: %r");
vthangup(z);
print("%V\n", score);
threadexitsall(0);
}

334
src/cmd/venti/randtest.c Normal file
View File

@ -0,0 +1,334 @@
#include <u.h>
#include <libc.h>
#include <venti.h>
#include <libsec.h>
#include <thread.h>
enum { STACK = 32768 };
void xxxsrand(long);
long xxxlrand(void);
Channel *cw;
Channel *cr;
char *host;
int blocksize, seed, randpct;
int doread, dowrite, packets, permute;
vlong totalbytes, cur;
VtConn *z;
int multi;
int maxpackets;
int sequence;
int doublecheck = 1;
uint *order;
void
usage(void)
{
fprint(2, "usage: randtest [-q] [-h host] [-s seed] [-b blocksize] [-p randpct] [-n totalbytes] [-M maxblocks] [-P] [-r] [-w]\n");
threadexitsall("usage");
}
void
wr(char *buf, char *buf2)
{
uchar score[VtScoreSize], score2[VtScoreSize];
DigestState ds;
memset(&ds, 0, sizeof ds);
if(doublecheck)
sha1((uchar*)buf, blocksize, score, &ds);
if(vtwrite(z, score2, VtDataType, (uchar*)buf, blocksize) < 0)
sysfatal("vtwrite %V at %,lld: %r", score, cur);
if(doublecheck && memcmp(score, score2, VtScoreSize) != 0)
sysfatal("score mismatch! %V %V", score, score2);
}
void
wrthread(void *v)
{
char *p;
USED(v);
while((p = recvp(cw)) != nil){
wr(p, nil);
free(p);
}
}
void
rd(char *buf, char *buf2)
{
uchar score[VtScoreSize];
DigestState ds;
memset(&ds, 0, sizeof ds);
sha1((uchar*)buf, blocksize, score, &ds);
if(vtread(z, score, VtDataType, (uchar*)buf2, blocksize) < 0)
sysfatal("vtread %V at %,lld: %r", score, cur);
if(memcmp(buf, buf2, blocksize) != 0)
sysfatal("bad data read! %V", score);
}
void
rdthread(void *v)
{
char *p, *buf2;
buf2 = vtmalloc(blocksize);
USED(v);
while((p = recvp(cr)) != nil){
rd(p, buf2);
free(p);
}
}
char *template;
void
run(void (*fn)(char*, char*), Channel *c)
{
int i, t, j, packets;
char *buf2, *buf;
buf2 = vtmalloc(blocksize);
buf = vtmalloc(blocksize);
cur = 0;
packets = totalbytes/blocksize;
if(maxpackets == 0)
maxpackets = packets;
order = vtmalloc(packets*sizeof order[0]);
for(i=0; i<packets; i++)
order[i] = i;
if(permute){
for(i=1; i<packets; i++){
j = nrand(i+1);
t = order[i];
order[i] = order[j];
order[j] = t;
}
}
for(i=0; i<packets && i<maxpackets; i++){
memmove(buf, template, blocksize);
*(uint*)buf = order[i];
if(c){
sendp(c, buf);
buf = vtmalloc(blocksize);
}else
(*fn)(buf, buf2);
cur += blocksize;
}
free(order);
}
#define TWID64 ((u64int)~(u64int)0)
u64int
unittoull(char *s)
{
char *es;
u64int n;
if(s == nil)
return TWID64;
n = strtoul(s, &es, 0);
if(*es == 'k' || *es == 'K'){
n *= 1024;
es++;
}else if(*es == 'm' || *es == 'M'){
n *= 1024*1024;
es++;
}else if(*es == 'g' || *es == 'G'){
n *= 1024*1024*1024;
es++;
}else if(*es == 't' || *es == 'T'){
n *= 1024*1024;
n *= 1024*1024;
}
if(*es != '\0')
return TWID64;
return n;
}
void
threadmain(int argc, char *argv[])
{
int i, max;
vlong t0;
double t;
blocksize = 8192;
seed = 0;
randpct = 50;
host = nil;
doread = 0;
dowrite = 0;
totalbytes = 1*1024*1024*1024;
fmtinstall('V', vtscorefmt);
fmtinstall('F', vtfcallfmt);
ARGBEGIN{
case 'b':
blocksize = unittoull(EARGF(usage()));
break;
case 'h':
host = EARGF(usage());
break;
case 'M':
maxpackets = unittoull(EARGF(usage()));
break;
case 'm':
multi = atoi(EARGF(usage()));
break;
case 'n':
totalbytes = unittoull(EARGF(usage()));
break;
case 'p':
randpct = atoi(EARGF(usage()));
break;
case 'P':
permute = 1;
break;
case 'S':
doublecheck = 0;
ventidoublechecksha1 = 0;
break;
case 's':
seed = atoi(EARGF(usage()));
break;
case 'r':
doread = 1;
break;
case 'w':
dowrite = 1;
break;
case 'V':
chattyventi++;
break;
default:
usage();
}ARGEND
if(doread==0 && dowrite==0){
doread = 1;
dowrite = 1;
}
z = vtdial(host);
if(z == nil)
sysfatal("could not connect to server: %r");
if(vtconnect(z) < 0)
sysfatal("vtconnect: %r");
if(multi){
cr = chancreate(sizeof(void*), 0);
cw = chancreate(sizeof(void*), 0);
for(i=0; i<multi; i++){
proccreate(wrthread, nil, STACK);
proccreate(rdthread, nil, STACK);
}
}
template = vtmalloc(blocksize);
xxxsrand(seed);
max = (256*randpct)/100;
if(max == 0)
max = 1;
for(i=0; i<blocksize; i++)
template[i] = xxxlrand()%max;
if(dowrite){
t0 = nsec();
run(wr, cw);
for(i=0; i<multi; i++)
sendp(cw, nil);
t = (nsec() - t0)/1.e9;
print("write: %lld bytes / %.3f seconds = %.6f MB/s\n",
totalbytes, t, (double)totalbytes/1e6/t);
}
if(doread){
t0 = nsec();
run(rd, cr);
for(i=0; i<multi; i++)
sendp(cr, nil);
t = (nsec() - t0)/1.e9;
print("read: %lld bytes / %.3f seconds = %.6f MB/s\n",
totalbytes, t, (double)totalbytes/1e6/t);
}
threadexitsall(nil);
}
/*
* algorithm by
* D. P. Mitchell & J. A. Reeds
*/
#define LEN 607
#define TAP 273
#define MASK 0x7fffffffL
#define A 48271
#define M 2147483647
#define Q 44488
#define R 3399
#define NORM (1.0/(1.0+MASK))
static ulong rng_vec[LEN];
static ulong* rng_tap = rng_vec;
static ulong* rng_feed = 0;
static void
isrand(long seed)
{
long lo, hi, x;
int i;
rng_tap = rng_vec;
rng_feed = rng_vec+LEN-TAP;
seed = seed%M;
if(seed < 0)
seed += M;
if(seed == 0)
seed = 89482311;
x = seed;
/*
* Initialize by x[n+1] = 48271 * x[n] mod (2**31 - 1)
*/
for(i = -20; i < LEN; i++) {
hi = x / Q;
lo = x % Q;
x = A*lo - R*hi;
if(x < 0)
x += M;
if(i >= 0)
rng_vec[i] = x;
}
}
void
xxxsrand(long seed)
{
isrand(seed);
}
long
xxxlrand(void)
{
ulong x;
rng_tap--;
if(rng_tap < rng_vec) {
if(rng_feed == 0) {
isrand(1);
rng_tap--;
}
rng_tap += LEN;
}
rng_feed--;
if(rng_feed < rng_vec)
rng_feed += LEN;
x = (*rng_feed + *rng_tap) & MASK;
*rng_feed = x;
return x;
}

75
src/cmd/venti/read.c Normal file
View File

@ -0,0 +1,75 @@
#include <u.h>
#include <libc.h>
#include <venti.h>
#include <libsec.h>
#include <thread.h>
void
usage(void)
{
fprint(2, "usage: read [-h host] [-t type] score\n");
threadexitsall("usage");
}
void
threadmain(int argc, char *argv[])
{
int type, n;
uchar score[VtScoreSize];
uchar *buf;
VtConn *z;
char *host;
fmtinstall('F', vtfcallfmt);
fmtinstall('V', vtscorefmt);
host = nil;
type = -1;
ARGBEGIN{
case 'h':
host = EARGF(usage());
break;
case 't':
type = atoi(argv[1]);
break;
default:
usage();
break;
}ARGEND
if(argc != 1)
usage();
if(vtparsescore(argv[0], nil, score) < 0)
sysfatal("could not parse score '%s': %r", argv[0]);
buf = vtmallocz(VtMaxLumpSize);
z = vtdial(host);
if(z == nil)
sysfatal("could not connect to server: %r");
if(vtconnect(z) < 0)
sysfatal("vtconnect: %r");
if(type == -1){
n = -1;
for(type=0; type<VtMaxType; type++){
n = vtread(z, score, type, buf, VtMaxLumpSize);
if(n >= 0){
fprint(2, "venti/read%s%s %V %d\n", host ? " -h" : "", host ? host : "",
score, type);
break;
}
}
}else{
type = atoi(argv[1]);
n = vtread(z, score, type, buf, VtMaxLumpSize);
}
vthangup(z);
if(n < 0)
sysfatal("could not read block: %r");
if(write(1, buf, n) != n)
sysfatal("write: %r");
threadexitsall(0);
}

112
src/cmd/venti/readlist.c Normal file
View File

@ -0,0 +1,112 @@
#include <u.h>
#include <libc.h>
#include <thread.h>
#include <venti.h>
#include <bio.h>
char *host;
Biobuf b;
VtConn *z;
uchar *buf;
void run(Biobuf*);
int nn;
void
usage(void)
{
fprint(2, "usage: readlist [-h host] list\n");
threadexitsall("usage");
}
int
parsescore(uchar *score, char *buf, int n)
{
int i, c;
memset(score, 0, VtScoreSize);
if(n != VtScoreSize*2){
werrstr("score wrong length %d", n);
return -1;
}
for(i=0; i<VtScoreSize*2; i++) {
if(buf[i] >= '0' && buf[i] <= '9')
c = buf[i] - '0';
else if(buf[i] >= 'a' && buf[i] <= 'f')
c = buf[i] - 'a' + 10;
else if(buf[i] >= 'A' && buf[i] <= 'F')
c = buf[i] - 'A' + 10;
else {
c = buf[i];
werrstr("bad score char %d '%c'", c, c);
return -1;
}
if((i & 1) == 0)
c <<= 4;
score[i>>1] |= c;
}
return 0;
}
void
threadmain(int argc, char *argv[])
{
int fd, i;
ARGBEGIN{
case 'h':
host = EARGF(usage());
break;
default:
usage();
break;
}ARGEND
fmtinstall('V', vtscorefmt);
buf = vtmallocz(VtMaxLumpSize);
z = vtdial(host);
if(z == nil)
sysfatal("could not connect to server: %r");
if(vtconnect(z) < 0)
sysfatal("vtconnect: %r");
if(argc == 0){
Binit(&b, 0, OREAD);
run(&b);
}else{
for(i=0; i<argc; i++){
if((fd = open(argv[i], OREAD)) < 0)
sysfatal("open %s: %r", argv[i]);
Binit(&b, fd, OREAD);
run(&b);
}
}
threadexitsall(nil);
}
void
run(Biobuf *b)
{
char *p, *f[10];
int nf;
uchar score[20];
int type, n;
while((p = Brdline(b, '\n')) != nil){
p[Blinelen(b)-1] = 0;
nf = tokenize(p, f, nelem(f));
if(nf != 2)
sysfatal("syntax error in work list");
if(parsescore(score, f[0], strlen(f[0])) < 0)
sysfatal("bad score %s in work list", f[0]);
type = atoi(f[1]);
n = vtread(z, score, type, buf, VtMaxLumpSize);
if(n < 0)
sysfatal("could not read %s %s: %r", f[0], f[1]);
// write(1, buf, n);
if(++nn%1000 == 0)
print("%d...", nn);
}
}

112
src/cmd/venti/ro.c Normal file
View File

@ -0,0 +1,112 @@
/* Copyright (c) 2004 Russ Cox */
#include <u.h>
#include <libc.h>
#include <venti.h>
#include <thread.h>
#include <libsec.h>
#ifndef _UNISTD_H_
#pragma varargck type "F" VtFcall*
#pragma varargck type "T" void
#endif
VtConn *z;
int verbose;
enum
{
STACK = 8192,
};
void
usage(void)
{
fprint(2, "usage: venti/ro [-v] [-a address] [-h address]\n");
threadexitsall("usage");
}
void
readthread(void *v)
{
char err[ERRMAX];
VtReq *r;
uchar *buf;
int n;
r = v;
buf = vtmalloc(r->tx.count);
if((n=vtread(z, r->tx.score, r->tx.blocktype, buf, r->tx.count)) < 0){
r->rx.msgtype = VtRerror;
rerrstr(err, sizeof err);
r->rx.error = vtstrdup(err);
free(buf);
}else{
r->rx.data = packetforeign(buf, n, free, buf);
}
if(verbose)
fprint(2, "-> %F\n", &r->rx);
vtrespond(r);
}
void
threadmain(int argc, char **argv)
{
VtReq *r;
VtSrv *srv;
char *address, *ventiaddress;
fmtinstall('F', vtfcallfmt);
fmtinstall('V', vtscorefmt);
address = "tcp!*!venti";
ventiaddress = nil;
ARGBEGIN{
case 'v':
verbose++;
break;
case 'a':
address = EARGF(usage());
break;
case 'h':
ventiaddress = EARGF(usage());
break;
default:
usage();
}ARGEND
if((z = vtdial(ventiaddress)) == nil)
sysfatal("vtdial %s: %r", ventiaddress);
if(vtconnect(z) < 0)
sysfatal("vtconnect: %r");
srv = vtlisten(address);
if(srv == nil)
sysfatal("vtlisten %s: %r", address);
while((r = vtgetreq(srv)) != nil){
r->rx.msgtype = r->tx.msgtype+1;
if(verbose)
fprint(2, "<- %F\n", &r->tx);
switch(r->tx.msgtype){
case VtTping:
break;
case VtTgoodbye:
break;
case VtTread:
threadcreate(readthread, r, 16384);
continue;
case VtTwrite:
r->rx.error = vtstrdup("read-only server");
r->rx.msgtype = VtRerror;
break;
case VtTsync:
break;
}
if(verbose)
fprint(2, "-> %F\n", &r->rx);
vtrespond(r);
}
threadexitsall(nil);
}

737
src/cmd/venti/srv/arena.c Normal file
View File

@ -0,0 +1,737 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
typedef struct ASum ASum;
struct ASum
{
Arena *arena;
ASum *next;
};
static void sealarena(Arena *arena);
static int okarena(Arena *arena);
static int loadarena(Arena *arena);
static CIBlock *getcib(Arena *arena, int clump, int writing, CIBlock *rock);
static void putcib(Arena *arena, CIBlock *cib);
static void sumproc(void *);
static QLock sumlock;
static Rendez sumwait;
static ASum *sumq;
static uchar zero[8192];
int arenasumsleeptime;
int
initarenasum(void)
{
sumwait.l = &sumlock;
if(vtproc(sumproc, nil) < 0){
seterr(EOk, "can't start arena checksum slave: %r");
return -1;
}
return 0;
}
/*
* make an Arena, and initialize it based upon the disk header and trailer.
*/
Arena*
initarena(Part *part, u64int base, u64int size, u32int blocksize)
{
Arena *arena;
arena = MKZ(Arena);
arena->part = part;
arena->blocksize = blocksize;
arena->clumpmax = arena->blocksize / ClumpInfoSize;
arena->base = base + blocksize;
arena->size = size - 2 * blocksize;
if(loadarena(arena) < 0){
seterr(ECorrupt, "arena header or trailer corrupted");
freearena(arena);
return nil;
}
if(okarena(arena) < 0){
freearena(arena);
return nil;
}
if(arena->diskstats.sealed && scorecmp(zeroscore, arena->score)==0)
backsumarena(arena);
return arena;
}
void
freearena(Arena *arena)
{
if(arena == nil)
return;
free(arena);
}
Arena*
newarena(Part *part, u32int vers, char *name, u64int base, u64int size, u32int blocksize)
{
int bsize;
Arena *arena;
if(nameok(name) < 0){
seterr(EOk, "illegal arena name", name);
return nil;
}
arena = MKZ(Arena);
arena->part = part;
arena->version = vers;
if(vers == ArenaVersion4)
arena->clumpmagic = _ClumpMagic;
else{
do
arena->clumpmagic = fastrand();
while(arena->clumpmagic==_ClumpMagic || arena->clumpmagic==0);
}
arena->blocksize = blocksize;
arena->clumpmax = arena->blocksize / ClumpInfoSize;
arena->base = base + blocksize;
arena->size = size - 2 * blocksize;
namecp(arena->name, name);
bsize = sizeof zero;
if(bsize > arena->blocksize)
bsize = arena->blocksize;
if(wbarena(arena)<0 || wbarenahead(arena)<0
|| writepart(arena->part, arena->base, zero, bsize)<0){
freearena(arena);
return nil;
}
return arena;
}
int
readclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
{
CIBlock *cib, r;
cib = getcib(arena, clump, 0, &r);
if(cib == nil)
return -1;
unpackclumpinfo(ci, &cib->data->data[cib->offset]);
putcib(arena, cib);
return 0;
}
int
readclumpinfos(Arena *arena, int clump, ClumpInfo *cis, int n)
{
CIBlock *cib, r;
int i;
for(i = 0; i < n; i++){
cib = getcib(arena, clump + i, 0, &r);
if(cib == nil)
break;
unpackclumpinfo(&cis[i], &cib->data->data[cib->offset]);
putcib(arena, cib);
}
return i;
}
/*
* write directory information for one clump
* must be called the arena locked
*/
int
writeclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
{
CIBlock *cib, r;
cib = getcib(arena, clump, 1, &r);
if(cib == nil)
return -1;
dirtydblock(cib->data, DirtyArenaCib);
packclumpinfo(ci, &cib->data->data[cib->offset]);
putcib(arena, cib);
return 0;
}
u64int
arenadirsize(Arena *arena, u32int clumps)
{
return ((clumps / arena->clumpmax) + 1) * arena->blocksize;
}
/*
* read a clump of data
* n is a hint of the size of the data, not including the header
* make sure it won't run off the end, then return the number of bytes actually read
*/
u32int
readarena(Arena *arena, u64int aa, u8int *buf, long n)
{
DBlock *b;
u64int a;
u32int blocksize, off, m;
long nn;
if(n == 0)
return -1;
qlock(&arena->lock);
a = arena->size - arenadirsize(arena, arena->memstats.clumps);
qunlock(&arena->lock);
if(aa >= a){
seterr(EOk, "reading beyond arena clump storage: clumps=%d aa=%lld a=%lld -1 clumps=%lld\n", arena->memstats.clumps, aa, a, arena->size - arenadirsize(arena, arena->memstats.clumps - 1));
return -1;
}
if(aa + n > a)
n = a - aa;
blocksize = arena->blocksize;
a = arena->base + aa;
off = a & (blocksize - 1);
a -= off;
nn = 0;
for(;;){
b = getdblock(arena->part, a, OREAD);
if(b == nil)
return -1;
m = blocksize - off;
if(m > n - nn)
m = n - nn;
memmove(&buf[nn], &b->data[off], m);
putdblock(b);
nn += m;
if(nn == n)
break;
off = 0;
a += blocksize;
}
return n;
}
/*
* write some data to the clump section at a given offset
* used to fix up corrupted arenas.
*/
u32int
writearena(Arena *arena, u64int aa, u8int *clbuf, u32int n)
{
DBlock *b;
u64int a;
u32int blocksize, off, m;
long nn;
int ok;
if(n == 0)
return -1;
qlock(&arena->lock);
a = arena->size - arenadirsize(arena, arena->memstats.clumps);
if(aa >= a || aa + n > a){
qunlock(&arena->lock);
seterr(EOk, "writing beyond arena clump storage");
return -1;
}
blocksize = arena->blocksize;
a = arena->base + aa;
off = a & (blocksize - 1);
a -= off;
nn = 0;
for(;;){
b = getdblock(arena->part, a, off != 0 || off + n < blocksize ? ORDWR : OWRITE);
if(b == nil){
qunlock(&arena->lock);
return -1;
}
dirtydblock(b, DirtyArena);
m = blocksize - off;
if(m > n - nn)
m = n - nn;
memmove(&b->data[off], &clbuf[nn], m);
// ok = writepart(arena->part, a, b->data, blocksize);
ok = 0;
putdblock(b);
if(ok < 0){
qunlock(&arena->lock);
return -1;
}
nn += m;
if(nn == n)
break;
off = 0;
a += blocksize;
}
qunlock(&arena->lock);
return n;
}
/*
* allocate space for the clump and write it,
* updating the arena directory
ZZZ question: should this distinguish between an arena
filling up and real errors writing the clump?
*/
u64int
writeaclump(Arena *arena, Clump *c, u8int *clbuf, u64int start, u64int *pa)
{
DBlock *b;
u64int a, aa;
u32int clump, n, nn, m, off, blocksize;
int ok;
AState as;
n = c->info.size + ClumpSize + U32Size;
qlock(&arena->lock);
aa = arena->memstats.used;
if(arena->memstats.sealed
|| aa + n + U32Size + arenadirsize(arena, arena->memstats.clumps + 1) > arena->size){
if(!arena->memstats.sealed){
trace(0, "seal memstats %s", arena->name);
arena->memstats.sealed = 1;
as.arena = arena;
as.aa = start+aa;
as.stats = arena->memstats;
setdcachestate(&as);
}
qunlock(&arena->lock);
return TWID64;
}
if(packclump(c, &clbuf[0], arena->clumpmagic) < 0){
qunlock(&arena->lock);
return TWID64;
}
/*
* write the data out one block at a time
*/
blocksize = arena->blocksize;
a = arena->base + aa;
off = a & (blocksize - 1);
a -= off;
nn = 0;
for(;;){
b = getdblock(arena->part, a, off != 0 ? ORDWR : OWRITE);
if(b == nil){
qunlock(&arena->lock);
return TWID64;
}
dirtydblock(b, DirtyArena);
m = blocksize - off;
if(m > n - nn)
m = n - nn;
memmove(&b->data[off], &clbuf[nn], m);
// ok = writepart(arena->part, a, b->data, blocksize);
ok = 0;
putdblock(b);
if(ok < 0){
qunlock(&arena->lock);
return TWID64;
}
nn += m;
if(nn == n)
break;
off = 0;
a += blocksize;
}
arena->memstats.used += c->info.size + ClumpSize;
arena->memstats.uncsize += c->info.uncsize;
if(c->info.size < c->info.uncsize)
arena->memstats.cclumps++;
clump = arena->memstats.clumps++;
if(arena->memstats.clumps == 0)
sysfatal("clumps wrapped");
arena->wtime = now();
if(arena->ctime == 0)
arena->ctime = arena->wtime;
writeclumpinfo(arena, clump, &c->info);
/* set up for call to setdcachestate */
as.arena = arena;
as.aa = start+arena->memstats.used;
as.stats = arena->memstats;
/* update this before calling setdcachestate so it cannot be behind dcache.diskstate */
*pa = start+aa;
setdcachestate(&as);
qunlock(&arena->lock);
return aa;
}
int
atailcmp(ATailStats *a, ATailStats *b)
{
/* good test */
if(a->used < b->used)
return -1;
if(a->used > b->used)
return 1;
/* suspect tests - why order this way? (no one cares) */
if(a->clumps < b->clumps)
return -1;
if(a->clumps > b->clumps)
return 1;
if(a->cclumps < b->cclumps)
return -1;
if(a->cclumps > b->cclumps)
return 1;
if(a->uncsize < b->uncsize)
return -1;
if(a->uncsize > b->uncsize)
return 1;
if(a->sealed < b->sealed)
return -1;
if(a->sealed > b->sealed)
return 1;
/* everything matches */
return 0;
}
void
setatailstate(AState *as)
{
int i, j, osealed;
Arena *a;
Index *ix;
trace(0, "setatailstate %s 0x%llux clumps %d", as->arena->name, as->aa, as->stats.clumps);
ix = mainindex;
for(i=0; i<ix->narenas; i++)
if(ix->arenas[i] == as->arena)
break;
if(i==ix->narenas || as->aa < ix->amap[i].start || as->aa >= ix->amap[i].stop || as->arena != ix->arenas[i]){
fprint(2, "funny settailstate 0x%llux\n", as->aa);
return;
}
for(j=i; --j>=0; ){
a = ix->arenas[j];
if(atailcmp(&a->diskstats, &a->memstats) == 0)
break;
}
for(j++; j<=i; j++){
a = ix->arenas[j];
qlock(&a->lock);
osealed = a->diskstats.sealed;
if(j == i)
a->diskstats = as->stats;
else
a->diskstats = a->memstats;
wbarena(a);
if(a->diskstats.sealed != osealed && !a->inqueue)
sealarena(a);
qunlock(&a->lock);
}
}
/*
* once sealed, an arena never has any data added to it.
* it should only be changed to fix errors.
* this also syncs the clump directory.
*/
static void
sealarena(Arena *arena)
{
arena->inqueue = 1;
backsumarena(arena);
}
void
backsumarena(Arena *arena)
{
ASum *as;
if(sumwait.l == nil)
return;
as = MK(ASum);
if(as == nil)
return;
qlock(&sumlock);
as->arena = arena;
as->next = sumq;
sumq = as;
rwakeup(&sumwait);
qunlock(&sumlock);
}
static void
sumproc(void *unused)
{
ASum *as;
Arena *arena;
USED(unused);
for(;;){
qlock(&sumlock);
while(sumq == nil)
rsleep(&sumwait);
as = sumq;
sumq = as->next;
qunlock(&sumlock);
arena = as->arena;
free(as);
sumarena(arena);
}
}
void
sumarena(Arena *arena)
{
ZBlock *b;
DigestState s;
u64int a, e;
u32int bs;
u8int score[VtScoreSize];
bs = MaxIoSize;
if(bs < arena->blocksize)
bs = arena->blocksize;
/*
* read & sum all blocks except the last one
*/
memset(&s, 0, sizeof s);
b = alloczblock(bs, 0, arena->part->blocksize);
e = arena->base + arena->size;
for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a += bs){
sleep(arenasumsleeptime);
if(a + bs > e)
bs = arena->blocksize;
if(readpart(arena->part, a, b->data, bs) < 0)
goto ReadErr;
addstat(StatSumRead, 1);
addstat(StatSumReadBytes, bs);
sha1(b->data, bs, nil, &s);
}
/*
* the last one is special, since it may already have the checksum included
*/
bs = arena->blocksize;
if(readpart(arena->part, e, b->data, bs) < 0){
ReadErr:
logerr(EOk, "sumarena can't sum %s, read at %lld failed: %r", arena->name, a);
freezblock(b);
return;
}
addstat(StatSumRead, 1);
addstat(StatSumReadBytes, bs);
sha1(b->data, bs-VtScoreSize, nil, &s);
sha1(zeroscore, VtScoreSize, nil, &s);
sha1(nil, 0, score, &s);
/*
* check for no checksum or the same
*
* the writepart is okay because we flushed the dcache in sealarena
*/
if(scorecmp(score, &b->data[bs - VtScoreSize]) != 0){
if(scorecmp(zeroscore, &b->data[bs - VtScoreSize]) != 0)
logerr(EOk, "overwriting mismatched checksums for arena=%s, found=%V calculated=%V",
arena->name, &b->data[bs - VtScoreSize], score);
scorecp(&b->data[bs - VtScoreSize], score);
if(writepart(arena->part, e, b->data, bs) < 0)
logerr(EOk, "sumarena can't write sum for %s: %r", arena->name);
}
freezblock(b);
qlock(&arena->lock);
scorecp(arena->score, score);
qunlock(&arena->lock);
}
/*
* write the arena trailer block to the partition
*/
int
wbarena(Arena *arena)
{
DBlock *b;
int bad;
if((b = getdblock(arena->part, arena->base + arena->size, OWRITE)) == nil){
logerr(EAdmin, "can't write arena trailer: %r");
return -1;
}
dirtydblock(b, DirtyArenaTrailer);
bad = okarena(arena)<0 || packarena(arena, b->data)<0;
putdblock(b);
if(bad)
return -1;
return 0;
}
int
wbarenahead(Arena *arena)
{
ZBlock *b;
ArenaHead head;
int bad;
namecp(head.name, arena->name);
head.version = arena->version;
head.size = arena->size + 2 * arena->blocksize;
head.blocksize = arena->blocksize;
head.clumpmagic = arena->clumpmagic;
b = alloczblock(arena->blocksize, 1, arena->part->blocksize);
if(b == nil){
logerr(EAdmin, "can't write arena header: %r");
///ZZZ add error message?
return -1;
}
/*
* this writepart is okay because it only happens
* during initialization.
*/
bad = packarenahead(&head, b->data)<0 ||
writepart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize)<0;
freezblock(b);
if(bad)
return -1;
return 0;
}
/*
* read the arena header and trailer blocks from disk
*/
static int
loadarena(Arena *arena)
{
ArenaHead head;
ZBlock *b;
b = alloczblock(arena->blocksize, 0, arena->part->blocksize);
if(b == nil)
return -1;
if(readpart(arena->part, arena->base + arena->size, b->data, arena->blocksize) < 0){
freezblock(b);
return -1;
}
if(unpackarena(arena, b->data) < 0){
freezblock(b);
return -1;
}
if(arena->version != ArenaVersion4 && arena->version != ArenaVersion5){
seterr(EAdmin, "unknown arena version %d", arena->version);
freezblock(b);
return -1;
}
scorecp(arena->score, &b->data[arena->blocksize - VtScoreSize]);
if(readpart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize) < 0){
logerr(EAdmin, "can't read arena header: %r");
freezblock(b);
return 0;
}
if(unpackarenahead(&head, b->data) < 0)
logerr(ECorrupt, "corrupted arena header: %r");
else if(namecmp(arena->name, head.name)!=0
|| arena->clumpmagic != head.clumpmagic
|| arena->version != head.version
|| arena->blocksize != head.blocksize
|| arena->size + 2 * arena->blocksize != head.size){
if(namecmp(arena->name, head.name)!=0)
logerr(ECorrupt, "arena tail name %s head %s",
arena->name, head.name);
else if(arena->clumpmagic != head.clumpmagic)
logerr(ECorrupt, "arena tail clumpmagic 0x%lux head 0x%lux",
(ulong)arena->clumpmagic, (ulong)head.clumpmagic);
else if(arena->version != head.version)
logerr(ECorrupt, "arena tail version %d head version %d",
arena->version, head.version);
else if(arena->blocksize != head.blocksize)
logerr(ECorrupt, "arena tail block size %d head %d",
arena->blocksize, head.blocksize);
else if(arena->size+2*arena->blocksize != head.size)
logerr(ECorrupt, "arena tail size %lud head %lud",
(ulong)arena->size+2*arena->blocksize, head.size);
else
logerr(ECorrupt, "arena header inconsistent with arena data");
}
freezblock(b);
return 0;
}
static int
okarena(Arena *arena)
{
u64int dsize;
int ok;
ok = 0;
dsize = arenadirsize(arena, arena->diskstats.clumps);
if(arena->diskstats.used + dsize > arena->size){
seterr(ECorrupt, "arena used > size");
ok = -1;
}
if(arena->diskstats.cclumps > arena->diskstats.clumps)
logerr(ECorrupt, "arena has more compressed clumps than total clumps");
if(arena->diskstats.uncsize + arena->diskstats.clumps * ClumpSize + arena->blocksize < arena->diskstats.used)
logerr(ECorrupt, "arena uncompressed size inconsistent with used space %lld %d %lld", arena->diskstats.uncsize, arena->diskstats.clumps, arena->diskstats.used);
if(arena->ctime > arena->wtime)
logerr(ECorrupt, "arena creation time after last write time");
return ok;
}
static CIBlock*
getcib(Arena *arena, int clump, int writing, CIBlock *rock)
{
int mode;
CIBlock *cib;
u32int block, off;
if(clump >= arena->memstats.clumps){
seterr(EOk, "clump directory access out of range");
return nil;
}
block = clump / arena->clumpmax;
off = (clump - block * arena->clumpmax) * ClumpInfoSize;
cib = rock;
cib->block = block;
cib->offset = off;
if(writing){
if(off == 0 && clump == arena->memstats.clumps-1)
mode = OWRITE;
else
mode = ORDWR;
}else
mode = OREAD;
cib->data = getdblock(arena->part,
arena->base + arena->size - (block + 1) * arena->blocksize, mode);
if(cib->data == nil)
return nil;
return cib;
}
static void
putcib(Arena *arena, CIBlock *cib)
{
putdblock(cib->data);
cib->data = nil;
}

414
src/cmd/venti/srv/arenas.c Normal file
View File

@ -0,0 +1,414 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
typedef struct AHash AHash;
/*
* hash table for finding arena's based on their names.
*/
struct AHash
{
AHash *next;
Arena *arena;
};
enum
{
AHashSize = 512
};
static AHash *ahash[AHashSize];
static u32int
hashstr(char *s)
{
u32int h;
int c;
h = 0;
for(; c = *s; s++){
c ^= c << 6;
h += (c << 11) ^ (c >> 1);
c = *s;
h ^= (c << 14) + (c << 7) + (c << 4) + c;
}
return h;
}
int
addarena(Arena *arena)
{
AHash *a;
u32int h;
h = hashstr(arena->name) & (AHashSize - 1);
a = MK(AHash);
if(a == nil)
return -1;
a->arena = arena;
a->next = ahash[h];
ahash[h] = a;
return 0;
}
Arena*
findarena(char *name)
{
AHash *a;
u32int h;
h = hashstr(name) & (AHashSize - 1);
for(a = ahash[h]; a != nil; a = a->next)
if(strcmp(a->arena->name, name) == 0)
return a->arena;
return nil;
}
int
delarena(Arena *arena)
{
AHash *a, *last;
u32int h;
h = hashstr(arena->name) & (AHashSize - 1);
last = nil;
for(a = ahash[h]; a != nil; a = a->next){
if(a->arena == arena){
if(last != nil)
last->next = a->next;
else
ahash[h] = a->next;
free(a);
return 0;
}
last = a;
}
return -1;
}
ArenaPart*
initarenapart(Part *part)
{
AMapN amn;
ArenaPart *ap;
ZBlock *b;
u32int i;
int ok;
b = alloczblock(HeadSize, 0, 0);
if(b == nil || readpart(part, PartBlank, b->data, HeadSize) < 0){
seterr(EAdmin, "can't read arena partition header: %r");
return nil;
}
ap = MKZ(ArenaPart);
if(ap == nil){
freezblock(b);
return nil;
}
ap->part = part;
ok = unpackarenapart(ap, b->data);
freezblock(b);
if(ok < 0){
freearenapart(ap, 0);
return nil;
}
ap->tabbase = (PartBlank + HeadSize + ap->blocksize - 1) & ~(ap->blocksize - 1);
if(ap->version != ArenaPartVersion){
seterr(ECorrupt, "unknown arena partition version %d", ap->version);
freearenapart(ap, 0);
return nil;
}
if(ap->blocksize & (ap->blocksize - 1)){
seterr(ECorrupt, "illegal non-power-of-2 block size %d\n", ap->blocksize);
freearenapart(ap, 0);
return nil;
}
if(ap->tabbase >= ap->arenabase){
seterr(ECorrupt, "arena partition table overlaps with arena storage");
freearenapart(ap, 0);
return nil;
}
ap->tabsize = ap->arenabase - ap->tabbase;
partblocksize(part, ap->blocksize);
ap->size = ap->part->size & ~(u64int)(ap->blocksize - 1);
if(readarenamap(&amn, part, ap->tabbase, ap->tabsize) < 0){
freearenapart(ap, 0);
return nil;
}
ap->narenas = amn.n;
ap->map = amn.map;
if(okamap(ap->map, ap->narenas, ap->arenabase, ap->size, "arena table") < 0){
freearenapart(ap, 0);
return nil;
}
ap->arenas = MKNZ(Arena*, ap->narenas);
for(i = 0; i < ap->narenas; i++){
ap->arenas[i] = initarena(part, ap->map[i].start, ap->map[i].stop - ap->map[i].start, ap->blocksize);
if(ap->arenas[i] == nil){
seterr(ECorrupt, "%s: %r", ap->map[i].name);
freearenapart(ap, 1);
return nil;
}
if(namecmp(ap->map[i].name, ap->arenas[i]->name) != 0){
seterr(ECorrupt, "arena name mismatches with expected name: %s vs. %s",
ap->map[i].name, ap->arenas[i]->name);
freearenapart(ap, 1);
return nil;
}
if(findarena(ap->arenas[i]->name)){
seterr(ECorrupt, "duplicate arena name %s in %s",
ap->map[i].name, ap->part->name);
freearenapart(ap, 1);
return nil;
}
}
for(i = 0; i < ap->narenas; i++)
addarena(ap->arenas[i]);
return ap;
}
ArenaPart*
newarenapart(Part *part, u32int blocksize, u32int tabsize)
{
ArenaPart *ap;
if(blocksize & (blocksize - 1)){
seterr(ECorrupt, "illegal non-power-of-2 block size %d\n", blocksize);
return nil;
}
ap = MKZ(ArenaPart);
if(ap == nil)
return nil;
ap->version = ArenaPartVersion;
ap->part = part;
ap->blocksize = blocksize;
partblocksize(part, blocksize);
ap->size = part->size & ~(u64int)(blocksize - 1);
ap->tabbase = (PartBlank + HeadSize + blocksize - 1) & ~(blocksize - 1);
ap->arenabase = (ap->tabbase + tabsize + blocksize - 1) & ~(blocksize - 1);
ap->tabsize = ap->arenabase - ap->tabbase;
ap->narenas = 0;
if(wbarenapart(ap) < 0){
freearenapart(ap, 0);
return nil;
}
return ap;
}
int
wbarenapart(ArenaPart *ap)
{
ZBlock *b;
if(okamap(ap->map, ap->narenas, ap->arenabase, ap->size, "arena table") < 0)
return -1;
b = alloczblock(HeadSize, 1, 0);
if(b == nil)
//ZZZ set error message?
return -1;
if(packarenapart(ap, b->data) < 0){
seterr(ECorrupt, "can't make arena partition header: %r");
freezblock(b);
return -1;
}
if(writepart(ap->part, PartBlank, b->data, HeadSize) < 0){
seterr(EAdmin, "can't write arena partition header: %r");
freezblock(b);
return -1;
}
freezblock(b);
return wbarenamap(ap->map, ap->narenas, ap->part, ap->tabbase, ap->tabsize);
}
void
freearenapart(ArenaPart *ap, int freearenas)
{
int i;
if(ap == nil)
return;
if(freearenas){
for(i = 0; i < ap->narenas; i++){
if(ap->arenas[i] == nil)
continue;
delarena(ap->arenas[i]);
freearena(ap->arenas[i]);
}
}
free(ap->map);
free(ap->arenas);
free(ap);
}
int
okamap(AMap *am, int n, u64int start, u64int stop, char *what)
{
u64int last;
u32int i;
last = start;
for(i = 0; i < n; i++){
if(am[i].start < last){
if(i == 0)
seterr(ECorrupt, "invalid start address in %s", what);
else
seterr(ECorrupt, "overlapping ranges in %s", what);
return -1;
}
if(am[i].stop < am[i].start){
seterr(ECorrupt, "invalid range in %s", what);
return -1;
}
last = am[i].stop;
}
if(last > stop){
seterr(ECorrupt, "invalid ending address in %s", what);
return -1;
}
return 0;
}
int
maparenas(AMap *am, Arena **arenas, int n, char *what)
{
u32int i;
for(i = 0; i < n; i++){
arenas[i] = findarena(am[i].name);
if(arenas[i] == nil){
seterr(EAdmin, "can't find arena '%s' for '%s'\n", am[i].name, what);
return -1;
}
}
return 0;
}
int
readarenamap(AMapN *amn, Part *part, u64int base, u32int size)
{
IFile f;
u32int ok;
if(partifile(&f, part, base, size) < 0)
return -1;
ok = parseamap(&f, amn);
freeifile(&f);
return ok;
}
int
wbarenamap(AMap *am, int n, Part *part, u64int base, u64int size)
{
Fmt f;
ZBlock *b;
b = alloczblock(size, 1, part->blocksize);
if(b == nil)
return -1;
fmtzbinit(&f, b);
if(outputamap(&f, am, n) < 0){
seterr(ECorrupt, "arena set size too small");
freezblock(b);
return -1;
}
if(writepart(part, base, b->data, size) < 0){
seterr(EAdmin, "can't write arena set: %r");
freezblock(b);
return -1;
}
freezblock(b);
return 0;
}
/*
* amap: n '\n' amapelem * n
* n: u32int
* amapelem: name '\t' astart '\t' asize '\n'
* astart, asize: u64int
*/
int
parseamap(IFile *f, AMapN *amn)
{
AMap *am;
u64int v64;
u32int v;
char *s, *t, *flds[4];
int i, n;
/*
* arenas
*/
if(ifileu32int(f, &v) < 0){
seterr(ECorrupt, "syntax error: bad number of elements in %s", f->name);
return -1;
}
n = v;
if(n > MaxAMap){
seterr(ECorrupt, "illegal number of elements in %s", f->name);
return -1;
}
am = MKNZ(AMap, n);
if(am == nil){
fprint(2, "out of memory\n");
return -1;
}
for(i = 0; i < n; i++){
s = ifileline(f);
if(s)
t = estrdup(s);
else
t = nil;
if(s == nil || getfields(s, flds, 4, 0, "\t") != 3){
fprint(2, "early eof after %d of %d, %s:#%d: %s\n", i, n, f->name, f->pos, t);
free(t);
return -1;
}
free(t);
if(nameok(flds[0]) < 0)
return -1;
namecp(am[i].name, flds[0]);
if(stru64int(flds[1], &v64) < 0){
seterr(ECorrupt, "syntax error: bad arena base address in %s", f->name);
free(am);
return -1;
}
am[i].start = v64;
if(stru64int(flds[2], &v64) < 0){
seterr(ECorrupt, "syntax error: bad arena size in %s", f->name);
free(am);
return -1;
}
am[i].stop = v64;
}
amn->map = am;
amn->n = n;
return 0;
}
int
outputamap(Fmt *f, AMap *am, int n)
{
int i;
if(fmtprint(f, "%ud\n", n) < 0)
return -1;
for(i = 0; i < n; i++)
if(fmtprint(f, "%s\t%llud\t%llud\n", am[i].name, am[i].start, am[i].stop) < 0)
return -1;
return 0;
}

210
src/cmd/venti/srv/bloom.c Normal file
View File

@ -0,0 +1,210 @@
/*
* Bloom filter tracking which scores are present in our arenas
* and (more importantly) which are not.
*/
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
int
bloominit(Bloom *b, vlong vsize, u8int *data)
{
ulong size;
size = vsize;
if(size != vsize){ /* truncation */
werrstr("bloom data too big");
return -1;
}
b->size = size;
b->nhash = 32; /* will be fixed by caller on initialization */
if(data != nil)
if(unpackbloomhead(b, data) < 0)
return -1;
fprint(2, "bloom size %lud nhash %d\n", b->size, b->nhash);
b->mask = b->size-1;
b->data = data;
return 0;
}
void
wbbloomhead(Bloom *b)
{
packbloomhead(b, b->data);
}
Bloom*
readbloom(Part *p)
{
int i, n;
uint ones;
uchar buf[512];
uchar *data;
u32int *a;
Bloom *b;
b = vtmallocz(sizeof *b);
if(readpart(p, 0, buf, sizeof buf) < 0)
return nil;
fprint(2, "header %.16H\n", buf);
if(bloominit(b, 0, buf) < 0){
vtfree(b);
return nil;
}
data = vtmallocz(b->size);
if(readpart(p, 0, data, b->size) < 0){
vtfree(b);
vtfree(data);
return nil;
}
b->data = data;
b->part = p;
a = (u32int*)b->data;
n = b->size/4;
ones = 0;
for(i=0; i<n; i++)
ones += countbits(a[i]);
addstat(StatBloomOnes, ones);
if(b->size == MaxBloomSize) /* 2^32 overflows ulong */
addstat(StatBloomBits, b->size*8-1);
else
addstat(StatBloomBits, b->size*8);
return b;
}
int
writebloom(Bloom *b)
{
wbbloomhead(b);
return writepart(b->part, 0, b->data, b->size);
}
/*
* Derive two random 32-bit quantities a, b from the score
* and then use a+b*i as a sequence of bloom filter indices.
* Michael Mitzenmacher has a recent (2005) paper saying this is okay.
* We reserve the bottom bytes (BloomHeadSize*8 bits) for the header.
*/
static void
gethashes(u8int *score, ulong *h)
{
int i;
u32int a, b;
a = 0;
b = 0;
for(i=4; i+8<=VtScoreSize; i+=8){
a ^= *(u32int*)(score+i);
b ^= *(u32int*)(score+i+4);
}
if(i+4 <= VtScoreSize) /* 20 is not 4-aligned */
a ^= *(u32int*)(score+i);
for(i=0; i<BloomMaxHash; i++, a+=b)
h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a;
}
static void
_markbloomfilter(Bloom *b, u8int *score)
{
int i, nnew;
ulong h[BloomMaxHash];
u32int x, *y, z, *tab;
trace("markbloomfilter", "markbloomfilter %V", score);
gethashes(score, h);
nnew = 0;
tab = (u32int*)b->data;
for(i=0; i<b->nhash; i++){
x = h[i];
y = &tab[(x&b->mask)>>5];
z = 1<<(x&31);
if(!(*y&z)){
nnew++;
*y |= z;
}
}
if(nnew)
addstat(StatBloomOnes, nnew);
trace("markbloomfilter", "markbloomfilter exit");
}
static int
_inbloomfilter(Bloom *b, u8int *score)
{
int i;
ulong h[BloomMaxHash], x;
u32int *tab;
gethashes(score, h);
tab = (u32int*)b->data;
for(i=0; i<b->nhash; i++){
x = h[i];
if(!(tab[(x&b->mask)>>5] & (1<<(x&31))))
return 0;
}
return 1;
}
int
inbloomfilter(Bloom *b, u8int *score)
{
int r;
uint ms;
if(b == nil)
return 1;
ms = msec();
rlock(&b->lk);
r = _inbloomfilter(b, score);
runlock(&b->lk);
ms = ms - msec();
addstat2(StatBloomLookup, 1, StatBloomLookupTime, ms);
if(r)
addstat(StatBloomMiss, 1);
else
addstat(StatBloomHit, 1);
return r;
}
void
markbloomfilter(Bloom *b, u8int *score)
{
if(b == nil)
return;
rlock(&b->lk);
qlock(&b->mod);
_markbloomfilter(b, score);
qunlock(&b->mod);
runlock(&b->lk);
}
static void
bloomwriteproc(void *v)
{
Bloom *b;
b = v;
for(;;){
recv(b->writechan, 0);
if(writebloom(b) < 0)
fprint(2, "oops! writing bloom: %r\n");
send(b->writedonechan, 0);
}
}
void
startbloomproc(Bloom *b)
{
b->writechan = chancreate(sizeof(void*), 0);
b->writedonechan = chancreate(sizeof(void*), 0);
vtproc(bloomwriteproc, b);
}

View File

@ -0,0 +1,132 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
/*
* An IEStream is a sorted list of index entries.
*/
struct IEStream
{
Part *part;
u64int off; /* read position within part */
u64int n; /* number of valid ientries left to read */
u32int size; /* allocated space in buffer */
u8int *buf;
u8int *pos; /* current place in buffer */
u8int *epos; /* end of valid buffer contents */
};
IEStream*
initiestream(Part *part, u64int off, u64int clumps, u32int size)
{
IEStream *ies;
//ZZZ out of memory?
ies = MKZ(IEStream);
ies->buf = MKN(u8int, size);
ies->epos = ies->buf;
ies->pos = ies->epos;
ies->off = off;
ies->n = clumps;
ies->size = size;
ies->part = part;
return ies;
}
void
freeiestream(IEStream *ies)
{
if(ies == nil)
return;
free(ies->buf);
free(ies);
}
/*
* Return the next IEntry (still packed) in the stream.
*/
static u8int*
peekientry(IEStream *ies)
{
u32int n, nn;
n = ies->epos - ies->pos;
if(n < IEntrySize){
memmove(ies->buf, ies->pos, n);
ies->epos = &ies->buf[n];
ies->pos = ies->buf;
nn = ies->size;
if(nn > ies->n * IEntrySize)
nn = ies->n * IEntrySize;
nn -= n;
if(nn == 0)
return nil;
//fprint(2, "peek %d from %llud into %p\n", nn, ies->off, ies->epos);
if(readpart(ies->part, ies->off, ies->epos, nn) < 0){
seterr(EOk, "can't read sorted index entries: %r");
return nil;
}
ies->epos += nn;
ies->off += nn;
}
return ies->pos;
}
/*
* Compute the bucket number for the given IEntry.
* Knows that the score is the first thing in the packed
* representation.
*/
static u32int
iebuck(Index *ix, u8int *b, IBucket *ib, IEStream *ies)
{
USED(ies);
USED(ib);
return hashbits(b, 32) / ix->div;
}
/*
* Fill ib with the next bucket in the stream.
*/
u32int
buildbucket(Index *ix, IEStream *ies, IBucket *ib, uint maxdata)
{
IEntry ie1, ie2;
u8int *b;
u32int buck;
buck = TWID32;
ib->n = 0;
while(ies->n){
b = peekientry(ies);
if(b == nil)
return TWID32;
//fprint(2, "b=%p ies->n=%lld ib.n=%d buck=%d score=%V\n", b, ies->n, ib->n, iebuck(ix, b, ib, ies), b);
if(ib->n == 0)
buck = iebuck(ix, b, ib, ies);
else{
if(buck != iebuck(ix, b, ib, ies))
break;
if(ientrycmp(&ib->data[(ib->n - 1)* IEntrySize], b) == 0){
/*
* guess that the larger address is the correct one to use
*/
unpackientry(&ie1, &ib->data[(ib->n - 1)* IEntrySize]);
unpackientry(&ie2, b);
seterr(EOk, "duplicate index entry for score=%V type=%d", ie1.score, ie1.ia.type);
ib->n--;
if(ie1.ia.addr > ie2.ia.addr)
memmove(b, &ib->data[ib->n * IEntrySize], IEntrySize);
}
}
if((ib->n+1)*IEntrySize > maxdata){
seterr(EOk, "bucket overflow");
return TWID32;
}
memmove(&ib->data[ib->n * IEntrySize], b, IEntrySize);
ib->n++;
ies->n--;
ies->pos += IEntrySize;
}
return buck;
}

View File

@ -0,0 +1,160 @@
/*
* Rebuild the Venti index from scratch.
*/
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
/*
* Write a single bucket. Could profit from a big buffer here
* so that we can absorb sporadic runs of blocks into one write,
* avoiding disk seeks.
*/
static int
writebucket(Index *ix, u32int buck, IBucket *ib, ZBlock *b)
{
ISect *is;
is = ix->sects[indexsect0(ix, buck)];
if(buck < is->start || buck >= is->stop){
seterr(EAdmin, "cannot find index section for bucket %lud\n", (ulong)buck);
return -1;
}
buck -= is->start;
/*
qlock(&stats.lock);
stats.indexwrites++;
qunlock(&stats.lock);
*/
packibucket(ib, b->data, is->bucketmagic);
return writepart(is->part, is->blockbase + ((u64int)buck << is->blocklog), b->data, is->blocksize);
}
static int
buildindex(Index *ix, Part *part, u64int off, u64int clumps, int zero)
{
IEStream *ies;
IBucket ib, zib;
ZBlock *z, *b;
u32int next, buck;
int ok;
uint nbuck;
u64int found = 0;
//ZZZ make buffer size configurable
b = alloczblock(ix->blocksize, 0, ix->blocksize);
z = alloczblock(ix->blocksize, 1, ix->blocksize);
ies = initiestream(part, off, clumps, 64*1024);
if(b == nil || z == nil || ies == nil){
ok = 0;
goto breakout;
return -1;
}
ok = 0;
next = 0;
memset(&ib, 0, sizeof ib);
ib.data = b->data + IBucketSize;
zib.data = z->data + IBucketSize;
zib.n = 0;
nbuck = 0;
for(;;){
buck = buildbucket(ix, ies, &ib, ix->blocksize-IBucketSize);
found += ib.n;
if(zero){
for(; next != buck; next++){
if(next == ix->buckets){
if(buck != TWID32){
fprint(2, "bucket out of range\n");
ok = -1;
}
goto breakout;
}
if(writebucket(ix, next, &zib, z) < 0){
fprint(2, "can't write zero bucket to buck=%d: %r", next);
ok = -1;
}
}
}
if(buck >= ix->buckets){
if(buck == TWID32)
break;
fprint(2, "bucket out of range\n");
ok = -1;
goto breakout;
}
if(writebucket(ix, buck, &ib, b) < 0){
fprint(2, "bad bucket found=%lld: %r\n", found);
ok = -1;
}
next = buck + 1;
if(++nbuck%10000 == 0)
fprint(2, "\t%,d buckets written...\n", nbuck);
}
breakout:;
fprint(2, "wrote index with %lld entries\n", found);
freeiestream(ies);
freezblock(z);
freezblock(b);
return ok;
}
void
usage(void)
{
fprint(2, "usage: buildindex [-Z] [-B blockcachesize] config tmppart\n");
threadexitsall(0);
}
Config conf;
void
threadmain(int argc, char *argv[])
{
Part *part;
u64int clumps, base;
u32int bcmem;
int zero;
zero = 1;
bcmem = 0;
ARGBEGIN{
case 'B':
bcmem = unittoull(ARGF());
break;
case 'Z':
zero = 0;
break;
default:
usage();
break;
}ARGEND
if(argc != 2)
usage();
if(initventi(argv[0], &conf) < 0)
sysfatal("can't init venti: %r");
if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
initdcache(bcmem);
fprint(2, "building a new index %s using %s for temporary storage\n", mainindex->name, argv[1]);
part = initpart(argv[1], ORDWR|ODIRECT);
if(part == nil)
sysfatal("can't initialize temporary partition: %r");
clumps = sortrawientries(mainindex, part, &base, mainindex->bloom);
if(clumps == TWID64)
sysfatal("can't build sorted index: %r");
fprint(2, "found and sorted index entries for clumps=%lld at %lld\n", clumps, base);
if(buildindex(mainindex, part, base, clumps, zero) < 0)
sysfatal("can't build new index: %r");
threadexitsall(0);
}

View File

@ -0,0 +1,135 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
static int verbose;
static void
checkarena(Arena *arena, int scan, int fix)
{
ATailStats old;
int err, e;
if(verbose && arena->memstats.clumps)
printarena(2, arena);
old = arena->memstats;
if(scan){
arena->memstats.used = 0;
arena->memstats.clumps = 0;
arena->memstats.cclumps = 0;
arena->memstats.uncsize = 0;
}
err = 0;
for(;;){
e = syncarena(arena, 0, 1000, 0, fix);
err |= e;
if(!(e & SyncHeader))
break;
if(verbose && arena->memstats.clumps)
fprint(2, ".");
}
if(verbose && arena->memstats.clumps)
fprint(2, "\n");
err &= ~SyncHeader;
if(arena->memstats.used != old.used
|| arena->memstats.clumps != old.clumps
|| arena->memstats.cclumps != old.cclumps
|| arena->memstats.uncsize != old.uncsize){
fprint(2, "%s: incorrect arena header fields\n", arena->name);
printarena(2, arena);
err |= SyncHeader;
}
if(!err || !fix)
return;
fprint(2, "%s: writing fixed arena header fields\n", arena->name);
arena->diskstats = arena->memstats;
if(wbarena(arena) < 0)
fprint(2, "arena header write failed: %r\n");
flushdcache();
}
void
usage(void)
{
fprint(2, "usage: checkarenas [-afv] file [arenaname...]\n");
threadexitsall(0);
}
int
should(char *name, int argc, char **argv)
{
int i;
if(argc == 0)
return 1;
for(i=0; i<argc; i++)
if(strcmp(name, argv[i]) == 0)
return 1;
return 0;
}
void
threadmain(int argc, char *argv[])
{
ArenaPart *ap;
Part *part;
char *file;
int i, fix, scan;
ventifmtinstall();
statsinit();
fix = 0;
scan = 0;
ARGBEGIN{
case 'f':
fix++;
break;
case 'a':
scan = 1;
break;
case 'v':
verbose++;
break;
default:
usage();
break;
}ARGEND
if(!fix)
readonly = 1;
if(argc < 1)
usage();
file = argv[0];
part = initpart(file, ORDWR|ODIRECT);
if(part == nil)
sysfatal("can't open partition %s: %r", file);
ap = initarenapart(part);
if(ap == nil)
sysfatal("can't initialize arena partition in %s: %r", file);
if(verbose > 1){
printarenapart(2, ap);
fprint(2, "\n");
}
initdcache(8 * MaxDiskBlock);
for(i = 0; i < ap->narenas; i++)
if(should(ap->arenas[i]->name, argc, argv))
checkarena(ap->arenas[i], scan, fix);
if(verbose > 1)
printstats();
threadexitsall(0);
}

View File

@ -0,0 +1,293 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
static int extra, missing, wrong;
static void
phdr(DBlock *eb)
{
static int did;
if(!did){
did = 1;
print("# diff actual correct\n");
}
print("%s block 0x%llux\n", eb->part->name, eb->addr);
}
static void
pie(IEntry *ie, char c)
{
print("%c %V %22lld %3d %5d %3d\n",
c, ie->score, ie->ia.addr, ie->ia.type, ie->ia.size, ie->ia.blocks);
}
static int
checkbucket(Index *ix, u32int buck, IBucket *ib)
{
ISect *is;
DBlock *eb;
IBucket eib;
IEntry ie, eie;
int i, ei, ok, c, hdr;
is = ix->sects[indexsect0(ix, buck)];
if(buck < is->start || buck >= is->stop){
seterr(EAdmin, "cannot find index section for bucket %lud\n", (ulong)buck);
return -1;
}
buck -= is->start;
eb = getdblock(is->part, is->blockbase + ((u64int)buck << is->blocklog), OREAD);
if(eb == nil)
return -1;
unpackibucket(&eib, eb->data, is->bucketmagic);
ok = 0;
ei = 0;
hdr = 0;
for(i = 0; i < ib->n; i++){
while(ei < eib.n){
c = ientrycmp(&ib->data[i * IEntrySize], &eib.data[ei * IEntrySize]);
if(c == 0){
unpackientry(&ie, &ib->data[i * IEntrySize]);
unpackientry(&eie, &eib.data[ei * IEntrySize]);
if(iaddrcmp(&ie.ia, &eie.ia) != 0){
if(!hdr){
phdr(eb);
hdr = 1;
}
wrong++;
pie(&eie, '<');
pie(&ie, '>');
}
ei++;
goto cont;
}
if(c < 0)
break;
if(!hdr){
phdr(eb);
hdr = 1;
}
unpackientry(&eie, &eib.data[ei*IEntrySize]);
extra++;
pie(&eie, '<');
ei++;
ok = -1;
}
if(!hdr){
phdr(eb);
hdr = 1;
}
unpackientry(&ie, &ib->data[i*IEntrySize]);
missing++;
pie(&ie, '>');
ok = -1;
cont:;
}
for(; ei < eib.n; ei++){
if(!hdr){
phdr(eb);
hdr = 1;
}
unpackientry(&eie, &eib.data[ei*IEntrySize]);
pie(&eie, '<');
ok = -1;
}
putdblock(eb);
return ok;
}
int
checkindex(Index *ix, Part *part, u64int off, u64int clumps, int zero)
{
IEStream *ies;
IBucket ib, zib;
ZBlock *z, *b;
u32int next, buck;
int ok, bok;
u64int found = 0;
//ZZZ make buffer size configurable
b = alloczblock(ix->blocksize, 0, ix->blocksize);
z = alloczblock(ix->blocksize, 1, ix->blocksize);
ies = initiestream(part, off, clumps, 64*1024);
if(b == nil || z == nil || ies == nil){
werrstr("allocating: %r");
ok = -1;
goto breakout;
return -1;
}
ok = 0;
next = 0;
memset(&ib, 0, sizeof ib);
ib.data = b->data;
zib.data = z->data;
zib.n = 0;
zib.buck = 0;
for(;;){
buck = buildbucket(ix, ies, &ib, ix->blocksize-IBucketSize);
found += ib.n;
if(zero){
for(; next != buck; next++){
if(next == ix->buckets){
if(buck != TWID32){
ok = -1;
werrstr("internal error: bucket out of range");
}
if(ok < 0)
werrstr("%d spurious entries, %d missing, %d wrong", extra, missing, wrong);
goto breakout;
}
bok = checkbucket(ix, next, &zib);
if(bok < 0)
ok = -1;
}
}
if(buck >= ix->buckets){
if(buck == TWID32)
break;
werrstr("internal error: bucket out of range");
ok = -1;
goto breakout;
}
bok = checkbucket(ix, buck, &ib);
if(bok < 0)
ok = -1;
next = buck + 1;
}
breakout:
freeiestream(ies);
freezblock(z);
freezblock(b);
return ok;
}
int
checkbloom(Bloom *b1, Bloom *b2, int fix)
{
u32int *a1, *a2;
int i, n, extra, missing;
if(b1==nil && b2==nil)
return 0;
if(b1==nil || b2==nil){
werrstr("nil/non-nil");
return -1;
}
wbbloomhead(b1);
wbbloomhead(b2);
if(memcmp(b1->data, b2->data, BloomHeadSize) != 0){
werrstr("bloom header mismatch");
return -1;
}
a1 = (u32int*)b1->data;
a2 = (u32int*)b2->data;
n = b1->size/4;
extra = 0;
missing = 0;
for(i=BloomHeadSize/4; i<n; i++){
if(a1[i] != a2[i]){
print("%.8ux/%.8ux.", a1[i], a2[i]);
extra += countbits(a1[i] & ~a2[i]);
missing += countbits(a2[i] & ~a1[i]);
}
}
if(extra || missing)
fprint(2, "bloom filter: %d spurious bits, %d missing bits\n", extra, missing);
else
fprint(2, "bloom filter: correct\n");
if(!fix && missing){
werrstr("missing bits");
return -1;
}
if(fix && (missing || extra)){
memmove(b1->data, b2->data, b1->size);
return writebloom(b1);
}
return 0;
}
void
usage(void)
{
fprint(2, "usage: checkindex [-f] [-B blockcachesize] config tmp\n");
threadexitsall(0);
}
Config conf;
void
threadmain(int argc, char *argv[])
{
Bloom *oldbloom, *newbloom;
Part *part;
u64int clumps, base;
u32int bcmem;
int fix, skipz, ok;
fix = 0;
bcmem = 0;
skipz = 0;
ARGBEGIN{
case 'B':
bcmem = unittoull(ARGF());
break;
case 'f':
fix++;
break;
case 'Z':
skipz = 1;
break;
default:
usage();
break;
}ARGEND
if(argc != 2)
usage();
ventifmtinstall();
part = initpart(argv[1], ORDWR|ODIRECT);
if(part == nil)
sysfatal("can't initialize temporary partition: %r");
if(!fix)
readonly = 1;
if(initventi(argv[0], &conf) < 0)
sysfatal("can't init venti: %r");
oldbloom = mainindex->bloom;
newbloom = nil;
if(oldbloom){
newbloom = vtmallocz(sizeof *newbloom);
bloominit(newbloom, oldbloom->size, nil);
newbloom->data = vtmallocz(oldbloom->size);
}
if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
initdcache(bcmem);
fprint(2, "checkindex: building entry list\n");
clumps = sortrawientries(mainindex, part, &base, newbloom);
if(clumps == TWID64)
sysfatal("can't build sorted index: %r");
fprint(2, "checkindex: checking %lld entries at %lld\n", clumps, base);
ok = 0;
if(checkindex(mainindex, part, base, clumps, !skipz) < 0){
fprint(2, "checkindex: %r\n");
ok = -1;
}
if(checkbloom(oldbloom, newbloom, fix) < 0){
fprint(2, "checkbloom: %r\n");
ok = -1;
}
if(ok < 0)
sysfatal("errors found");
fprint(2, "checkindex: index is correct\n");
threadexitsall(0);
}

222
src/cmd/venti/srv/clump.c Normal file
View File

@ -0,0 +1,222 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
#include "whack.h"
/*
* Write a lump to disk. Updates ia with an index address
* for the newly-written lump. Upon return, the lump will
* have been placed in the disk cache but will likely not be on disk yet.
*/
int
storeclump(Index *ix, ZBlock *zb, u8int *sc, int type, u32int creator, IAddr *ia)
{
ZBlock *cb;
Clump cl;
u64int a;
u8int bh[VtScoreSize];
int size, dsize;
trace(TraceLump, "storeclump enter", sc, type);
size = zb->len;
if(size > VtMaxLumpSize){
seterr(EStrange, "lump too large");
return -1;
}
if(vttypevalid(type) < 0){
seterr(EStrange, "invalid lump type");
return -1;
}
if(0){
scoremem(bh, zb->data, size);
if(scorecmp(sc, bh) != 0){
seterr(ECorrupt, "storing clump: corrupted; expected=%V got=%V, size=%d", sc, bh, size);
return -1;
}
}
cb = alloczblock(size + ClumpSize + U32Size, 0, 0);
if(cb == nil)
return -1;
cl.info.type = type;
cl.info.uncsize = size;
cl.creator = creator;
cl.time = now();
scorecp(cl.info.score, sc);
trace(TraceLump, "storeclump whackblock");
dsize = whackblock(&cb->data[ClumpSize], zb->data, size);
if(dsize > 0 && dsize < size){
cl.encoding = ClumpECompress;
}else{
if(dsize > size){
fprint(2, "whack error: dsize=%d size=%d\n", dsize, size);
abort();
}
cl.encoding = ClumpENone;
dsize = size;
memmove(&cb->data[ClumpSize], zb->data, size);
}
memset(cb->data+ClumpSize+dsize, 0, 4);
cl.info.size = dsize;
ia->addr = 0;
ia->type = type;
ia->size = size;
ia->blocks = (dsize + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
a = writeiclump(ix, &cl, cb->data, &ia->addr);
trace(TraceLump, "storeclump exit %lld", a);
freezblock(cb);
if(a == TWID64)
return -1;
/*
qlock(&stats.lock);
stats.clumpwrites++;
stats.clumpbwrites += size;
stats.clumpbcomp += dsize;
qunlock(&stats.lock);
*/
return 0;
}
u32int
clumpmagic(Arena *arena, u64int aa)
{
u8int buf[U32Size];
if(readarena(arena, aa, buf, U32Size) < 0)
return TWID32;
return unpackmagic(buf);
}
/*
* fetch a block based at addr.
* score is filled in with the block's score.
* blocks is roughly the length of the clump on disk;
* if zero, the length is unknown.
*/
ZBlock*
loadclump(Arena *arena, u64int aa, int blocks, Clump *cl, u8int *score, int verify)
{
Unwhack uw;
ZBlock *zb, *cb;
u8int bh[VtScoreSize], *buf;
u32int n;
int nunc;
/*
qlock(&stats.lock);
stats.clumpreads++;
qunlock(&stats.lock);
*/
if(blocks <= 0)
blocks = 1;
trace(TraceLump, "loadclump enter");
cb = alloczblock(blocks << ABlockLog, 0, 0);
if(cb == nil)
return nil;
n = readarena(arena, aa, cb->data, blocks << ABlockLog);
if(n < ClumpSize){
if(n != 0)
seterr(ECorrupt, "loadclump read less than a header");
freezblock(cb);
return nil;
}
trace(TraceLump, "loadclump unpack");
if(unpackclump(cl, cb->data, arena->clumpmagic) < 0){
seterr(ECorrupt, "loadclump %s %llud: %r", arena->name, aa);
freezblock(cb);
return nil;
}
n -= ClumpSize;
if(n < cl->info.size){
freezblock(cb);
n = cl->info.size;
cb = alloczblock(n, 0, 0);
if(cb == nil)
return nil;
if(readarena(arena, aa + ClumpSize, cb->data, n) != n){
seterr(ECorrupt, "loadclump read too little data");
freezblock(cb);
return nil;
}
buf = cb->data;
}else
buf = cb->data + ClumpSize;
scorecp(score, cl->info.score);
zb = alloczblock(cl->info.uncsize, 0, 0);
if(zb == nil){
freezblock(cb);
return nil;
}
switch(cl->encoding){
case ClumpECompress:
trace(TraceLump, "loadclump decompress");
unwhackinit(&uw);
nunc = unwhack(&uw, zb->data, cl->info.uncsize, buf, cl->info.size);
if(nunc != cl->info.uncsize){
if(nunc < 0)
seterr(ECorrupt, "decompression of %llud failed: %s", aa, uw.err);
else
seterr(ECorrupt, "decompression of %llud gave partial block: %d/%d\n", aa, nunc, cl->info.uncsize);
freezblock(cb);
freezblock(zb);
return nil;
}
break;
case ClumpENone:
if(cl->info.size != cl->info.uncsize){
seterr(ECorrupt, "loading clump: bad uncompressed size for uncompressed block %llud", aa);
freezblock(cb);
freezblock(zb);
return nil;
}
scoremem(bh, buf, cl->info.uncsize);
if(scorecmp(cl->info.score, bh) != 0)
seterr(ECorrupt, "pre-copy sha1 wrong at %s %llud: expected=%V got=%V", arena->name, aa, cl->info.score, bh);
memmove(zb->data, buf, cl->info.uncsize);
break;
default:
seterr(ECorrupt, "unknown encoding in loadlump %llud", aa);
freezblock(cb);
freezblock(zb);
return nil;
}
freezblock(cb);
if(verify){
trace(TraceLump, "loadclump verify");
scoremem(bh, zb->data, cl->info.uncsize);
if(scorecmp(cl->info.score, bh) != 0){
seterr(ECorrupt, "loading clump: corrupted at %s %llud; expected=%V got=%V", arena->name, aa, cl->info.score, bh);
freezblock(zb);
return nil;
}
if(vttypevalid(cl->info.type) < 0){
seterr(ECorrupt, "loading lump at %s %llud: invalid lump type %d", arena->name, aa, cl->info.type);
freezblock(zb);
return nil;
}
}
trace(TraceLump, "loadclump exit");
/*
qlock(&stats.lock);
stats.clumpbreads += cl->info.size;
stats.clumpbuncomp += cl->info.uncsize;
qunlock(&stats.lock);
*/
return zb;
}

View File

@ -0,0 +1,127 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
int count[VtMaxLumpSize][VtMaxType];
Config conf;
enum
{
ClumpChunks = 32*1024
};
static int
readarenainfo(Arena *arena)
{
ClumpInfo *ci, *cis;
u32int clump;
int i, n, ok;
if(arena->memstats.clumps)
fprint(2, "reading directory for arena=%s with %d entries\n", arena->name, arena->memstats.clumps);
cis = MKN(ClumpInfo, ClumpChunks);
ok = 0;
for(clump = 0; clump < arena->memstats.clumps; clump += n){
n = ClumpChunks;
if(n > arena->memstats.clumps - clump)
n = arena->memstats.clumps - clump;
if((i=readclumpinfos(arena, clump, cis, n)) != n){
seterr(EOk, "arena directory read failed %d not %d: %r", i, n);
ok = -1;
break;
}
for(i = 0; i < n; i++){
ci = &cis[i];
if(ci->type >= VtMaxType || ci->uncsize >= VtMaxLumpSize) {
fprint(2, "bad clump: %d: type = %d: size = %d\n", clump+i, ci->type, ci->uncsize);
continue;
}
count[ci->uncsize][ci->type]++;
}
}
free(cis);
if(ok < 0)
return TWID32;
return clump;
}
static void
clumpstats(Index *ix)
{
int ok;
ulong clumps, n;
int i, j, t;
ok = 0;
clumps = 0;
for(i = 0; i < ix->narenas; i++){
n = readarenainfo(ix->arenas[i]);
if(n == TWID32){
ok = -1;
break;
}
clumps += n;
}
if(ok < 0)
return;
print("clumps = %ld\n", clumps);
for(i=0; i<VtMaxLumpSize; i++) {
t = 0;
for(j=0; j<VtMaxType; j++)
t += count[i][j];
if(t == 0)
continue;
print("%d\t%d", i, t);
for(j=0; j<VtMaxType; j++)
print("\t%d", count[i][j]);
print("\n");
}
}
void
usage(void)
{
fprint(2, "usage: clumpstats [-B blockcachesize] config\n");
threadexitsall(0);
}
void
threadmain(int argc, char *argv[])
{
u32int bcmem;
bcmem = 0;
ARGBEGIN{
case 'B':
bcmem = unittoull(ARGF());
break;
default:
usage();
break;
}ARGEND
readonly = 1;
if(argc != 1)
usage();
if(initventi(argv[0], &conf) < 0)
sysfatal("can't init venti: %r");
if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
initdcache(bcmem);
clumpstats(mainindex);
threadexitsall(0);
}

245
src/cmd/venti/srv/config.c Normal file
View File

@ -0,0 +1,245 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
Index *mainindex;
int paranoid = 1; /* should verify hashes on disk read */
static ArenaPart *configarenas(char *file);
static ISect *configisect(char *file);
static Bloom *configbloom(char *file);
int
initventi(char *file, Config *conf)
{
statsinit();
if(file == nil){
seterr(EOk, "no configuration file");
return -1;
}
if(runconfig(file, conf) < 0){
seterr(EOk, "can't initialize venti: %r");
return -1;
}
mainindex = initindex(conf->index, conf->sects, conf->nsects);
if(mainindex == nil)
return -1;
mainindex->bloom = conf->bloom;
return 0;
}
static int
numok(char *s)
{
char *p;
strtoull(s, &p, 0);
if(p == s)
return -1;
if(*p == 0)
return 0;
if(p[1] == 0 && strchr("MmGgKk", *p))
return 0;
return 0;
}
/*
* configs :
* | configs config
* config : "isect" filename
* | "arenas" filename
* | "index" name
* | "bcmem" num
* | "mem" num
* | "icmem" num
* | "queuewrites"
* | "httpaddr" address
* | "addr" address
*
* '#' and \n delimit comments
*/
enum
{
MaxArgs = 2
};
int
runconfig(char *file, Config *config)
{
ArenaPart **av;
ISect **sv;
IFile f;
char *s, *line, *flds[MaxArgs + 1];
int i, ok;
if(readifile(&f, file) < 0)
return -1;
memset(config, 0, sizeof *config);
config->mem = 0xFFFFFFFFUL;
ok = -1;
line = nil;
for(;;){
s = ifileline(&f);
if(s == nil){
ok = 0;
break;
}
line = estrdup(s);
i = getfields(s, flds, MaxArgs + 1, 1, " \t\r");
if(i == 2 && strcmp(flds[0], "isect") == 0){
sv = MKN(ISect*, config->nsects + 1);
for(i = 0; i < config->nsects; i++)
sv[i] = config->sects[i];
free(config->sects);
config->sects = sv;
config->sects[config->nsects] = configisect(flds[1]);
if(config->sects[config->nsects] == nil)
break;
config->nsects++;
}else if(i == 2 && strcmp(flds[0], "arenas") == 0){
av = MKN(ArenaPart*, config->naparts + 1);
for(i = 0; i < config->naparts; i++)
av[i] = config->aparts[i];
free(config->aparts);
config->aparts = av;
config->aparts[config->naparts] = configarenas(flds[1]);
if(config->aparts[config->naparts] == nil)
break;
config->naparts++;
}else if(i == 2 && strcmp(flds[0], "bloom") == 0){
if(config->bloom){
seterr(EAdmin, "duplicate bloom lines in configuration file %s", file);
break;
}
if((config->bloom = configbloom(flds[1])) == nil)
break;
}else if(i == 2 && strcmp(flds[0], "index") == 0){
if(nameok(flds[1]) < 0){
seterr(EAdmin, "illegal index name %s in config file %s", flds[1], file);
break;
}
if(config->index != nil){
seterr(EAdmin, "duplicate indices in config file %s", file);
break;
}
config->index = estrdup(flds[1]);
}else if(i == 2 && strcmp(flds[0], "bcmem") == 0){
if(numok(flds[1]) < 0){
seterr(EAdmin, "illegal size %s in config file %s",
flds[1], file);
break;
}
if(config->bcmem != 0){
seterr(EAdmin, "duplicate bcmem lines in config file %s", file);
break;
}
config->bcmem = unittoull(flds[1]);
}else if(i == 2 && strcmp(flds[0], "mem") == 0){
if(numok(flds[1]) < 0){
seterr(EAdmin, "illegal size %s in config file %s",
flds[1], file);
break;
}
if(config->mem != 0xFFFFFFFFUL){
seterr(EAdmin, "duplicate mem lines in config file %s", file);
break;
}
config->mem = unittoull(flds[1]);
}else if(i == 2 && strcmp(flds[0], "icmem") == 0){
if(numok(flds[1]) < 0){
seterr(EAdmin, "illegal size %s in config file %s",
flds[1], file);
break;
}
if(config->icmem != 0){
seterr(EAdmin, "duplicate icmem lines in config file %s", file);
break;
}
config->icmem = unittoull(flds[1]);
}else if(i == 1 && strcmp(flds[0], "queuewrites") == 0){
config->queuewrites = 1;
}else if(i == 2 && strcmp(flds[0], "httpaddr") == 0){
if(config->haddr){
seterr(EAdmin, "duplicate httpaddr lines in configuration file %s", file);
break;
}
config->haddr = estrdup(flds[1]);
}else if(i == 2 && strcmp(flds[0], "webroot") == 0){
if(config->webroot){
seterr(EAdmin, "duplicate webroot lines in configuration file %s", file);
break;
}
config->webroot = estrdup(flds[1]);
}else if(i == 2 && strcmp(flds[0], "addr") == 0){
if(config->vaddr){
seterr(EAdmin, "duplicate addr lines in configuration file %s", file);
break;
}
config->vaddr = estrdup(flds[1]);
}else{
seterr(EAdmin, "illegal line '%s' in configuration file %s", line, file);
break;
}
free(line);
line = nil;
}
free(line);
freeifile(&f);
if(ok < 0){
free(config->sects);
config->sects = nil;
free(config->aparts);
config->aparts = nil;
}
return ok;
}
static ISect*
configisect(char *file)
{
Part *part;
ISect *is;
if(0) fprint(2, "configure index section in %s\n", file);
part = initpart(file, ORDWR|ODIRECT);
if(part == nil)
return nil;
is = initisect(part);
if(is == nil)
werrstr("%s: %r", file);
return is;
}
static ArenaPart*
configarenas(char *file)
{
ArenaPart *ap;
Part *part;
if(0) fprint(2, "configure arenas in %s\n", file);
part = initpart(file, ORDWR|ODIRECT);
if(part == nil)
return nil;
ap = initarenapart(part);
if(ap == nil)
werrstr("%s: %r", file);
return ap;
}
static Bloom*
configbloom(char *file)
{
Bloom *b;
Part *part;
if(0) fprint(2, "configure bloom in %s\n", file);
part = initpart(file, ORDWR|ODIRECT);
if(part == nil)
return nil;
b = readbloom(part);
if(b == nil)
werrstr("%s: %r", file);
return b;
}

632
src/cmd/venti/srv/conv.c Normal file
View File

@ -0,0 +1,632 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
/*
* disk structure conversion routines
*/
#define U8GET(p) ((p)[0])
#define U16GET(p) (((p)[0]<<8)|(p)[1])
#define U32GET(p) ((u32int)(((p)[0]<<24)|((p)[1]<<16)|((p)[2]<<8)|(p)[3]))
#define U64GET(p) (((u64int)U32GET(p)<<32)|(u64int)U32GET((p)+4))
#define U8PUT(p,v) (p)[0]=(v)&0xFF
#define U16PUT(p,v) (p)[0]=((v)>>8)&0xFF;(p)[1]=(v)&0xFF
#define U32PUT(p,v) (p)[0]=((v)>>24)&0xFF;(p)[1]=((v)>>16)&0xFF;(p)[2]=((v)>>8)&0xFF;(p)[3]=(v)&0xFF
#define U64PUT(p,v,t32) t32=(v)>>32;U32PUT(p,t32);t32=(v);U32PUT((p)+4,t32)
static struct {
u32int m;
char *s;
} magics[] = {
ArenaPartMagic, "ArenaPartMagic",
ArenaHeadMagic, "ArenaHeadMagic",
ArenaMagic, "ArenaMagic",
ISectMagic, "ISectMagic",
BloomMagic, "BloomMagic",
};
static char*
fmtmagic(char *s, u32int m)
{
int i;
for(i=0; i<nelem(magics); i++)
if(magics[i].m == m)
return magics[i].s;
sprint(s, "0x%08ux", m);
return s;
}
u32int
unpackmagic(u8int *buf)
{
return U32GET(buf);
}
void
packmagic(u32int magic, u8int *buf)
{
U32PUT(buf, magic);
}
int
unpackarenapart(ArenaPart *ap, u8int *buf)
{
u8int *p;
u32int m;
char fbuf[20];
p = buf;
m = U32GET(p);
if(m != ArenaPartMagic){
seterr(ECorrupt, "arena set has wrong magic number: %s expected ArenaPartMagic (%lux)", fmtmagic(fbuf, m), ArenaPartMagic);
return -1;
}
p += U32Size;
ap->version = U32GET(p);
p += U32Size;
ap->blocksize = U32GET(p);
p += U32Size;
ap->arenabase = U32GET(p);
p += U32Size;
if(buf + ArenaPartSize != p)
sysfatal("unpackarenapart unpacked wrong amount");
return 0;
}
int
packarenapart(ArenaPart *ap, u8int *buf)
{
u8int *p;
p = buf;
U32PUT(p, ArenaPartMagic);
p += U32Size;
U32PUT(p, ap->version);
p += U32Size;
U32PUT(p, ap->blocksize);
p += U32Size;
U32PUT(p, ap->arenabase);
p += U32Size;
if(buf + ArenaPartSize != p)
sysfatal("packarenapart packed wrong amount");
return 0;
}
int
unpackarena(Arena *arena, u8int *buf)
{
int sz;
u8int *p;
u32int m;
char fbuf[20];
p = buf;
m = U32GET(p);
if(m != ArenaMagic){
seterr(ECorrupt, "arena has wrong magic number: %s expected ArenaMagic (%lux)", fmtmagic(fbuf, m), ArenaMagic);
return -1;
}
p += U32Size;
arena->version = U32GET(p);
p += U32Size;
namecp(arena->name, (char*)p);
p += ANameSize;
arena->diskstats.clumps = U32GET(p);
p += U32Size;
arena->diskstats.cclumps = U32GET(p);
p += U32Size;
arena->ctime = U32GET(p);
p += U32Size;
arena->wtime = U32GET(p);
p += U32Size;
if(arena->version == ArenaVersion5){
arena->clumpmagic = U32GET(p);
p += U32Size;
}
arena->diskstats.used = U64GET(p);
p += U64Size;
arena->diskstats.uncsize = U64GET(p);
p += U64Size;
arena->diskstats.sealed = U8GET(p);
p += U8Size;
arena->memstats = arena->diskstats;
switch(arena->version){
case ArenaVersion4:
sz = ArenaSize4;
arena->clumpmagic = _ClumpMagic;
break;
case ArenaVersion5:
sz = ArenaSize5;
break;
default:
seterr(ECorrupt, "arena has bad version number %d", arena->version);
return -1;
}
if(buf + sz != p)
sysfatal("unpackarena unpacked wrong amount");
return 0;
}
int
packarena(Arena *arena, u8int *buf)
{
int sz;
u8int *p;
u32int t32;
switch(arena->version){
case ArenaVersion4:
sz = ArenaSize4;
if(arena->clumpmagic != _ClumpMagic)
fprint(2, "warning: writing old arena tail loses clump magic 0x%lux != 0x%lux\n",
(ulong)arena->clumpmagic, (ulong)_ClumpMagic);
break;
case ArenaVersion5:
sz = ArenaSize5;
break;
default:
sysfatal("packarena unknown version %d", arena->version);
return -1;
}
p = buf;
U32PUT(p, ArenaMagic);
p += U32Size;
U32PUT(p, arena->version);
p += U32Size;
namecp((char*)p, arena->name);
p += ANameSize;
U32PUT(p, arena->diskstats.clumps);
p += U32Size;
U32PUT(p, arena->diskstats.cclumps);
p += U32Size;
U32PUT(p, arena->ctime);
p += U32Size;
U32PUT(p, arena->wtime);
p += U32Size;
if(arena->version == ArenaVersion5){
U32PUT(p, arena->clumpmagic);
p += U32Size;
}
U64PUT(p, arena->diskstats.used, t32);
p += U64Size;
U64PUT(p, arena->diskstats.uncsize, t32);
p += U64Size;
U8PUT(p, arena->diskstats.sealed);
p += U8Size;
if(buf + sz != p)
sysfatal("packarena packed wrong amount");
return 0;
}
int
unpackarenahead(ArenaHead *head, u8int *buf)
{
u8int *p;
u32int m;
int sz;
p = buf;
m = U32GET(p);
/* XXX check magic! */
p += U32Size;
head->version = U32GET(p);
p += U32Size;
namecp(head->name, (char*)p);
p += ANameSize;
head->blocksize = U32GET(p);
p += U32Size;
head->size = U64GET(p);
p += U64Size;
if(head->version == ArenaVersion5){
head->clumpmagic = U32GET(p);
p += U32Size;
}
switch(head->version){
case ArenaVersion4:
sz = ArenaHeadSize4;
head->clumpmagic = _ClumpMagic;
break;
case ArenaVersion5:
sz = ArenaHeadSize5;
break;
default:
seterr(ECorrupt, "arena head has unexpected version %d", head->version);
return -1;
}
if(buf + sz != p)
sysfatal("unpackarenahead unpacked wrong amount");
return 0;
}
int
packarenahead(ArenaHead *head, u8int *buf)
{
u8int *p;
int sz;
u32int t32;
switch(head->version){
case ArenaVersion4:
sz = ArenaHeadSize4;
if(head->clumpmagic != _ClumpMagic)
fprint(2, "warning: writing old arena header loses clump magic 0x%lux != 0x%lux\n",
(ulong)head->clumpmagic, (ulong)_ClumpMagic);
break;
case ArenaVersion5:
sz = ArenaHeadSize5;
break;
default:
sysfatal("packarenahead unknown version %d", head->version);
return -1;
}
p = buf;
U32PUT(p, ArenaHeadMagic);
p += U32Size;
U32PUT(p, head->version);
p += U32Size;
namecp((char*)p, head->name);
p += ANameSize;
U32PUT(p, head->blocksize);
p += U32Size;
U64PUT(p, head->size, t32);
p += U64Size;
if(head->version == ArenaVersion5){
U32PUT(p, head->clumpmagic);
p += U32Size;
}
if(buf + sz != p)
sysfatal("packarenahead packed wrong amount");
return 0;
}
static int
checkclump(Clump *w)
{
if(w->encoding == ClumpENone){
if(w->info.size != w->info.uncsize){
seterr(ECorrupt, "uncompressed wad size mismatch");
return -1;
}
}else if(w->encoding == ClumpECompress){
if(w->info.size >= w->info.uncsize){
seterr(ECorrupt, "compressed lump has inconsistent block sizes %d %d", w->info.size, w->info.uncsize);
return -1;
}
}else{
seterr(ECorrupt, "clump has illegal encoding");
return -1;
}
return 0;
}
int
unpackclump(Clump *c, u8int *buf, u32int cmagic)
{
u8int *p;
u32int magic;
p = buf;
magic = U32GET(p);
if(magic != cmagic){
seterr(ECorrupt, "clump has bad magic number=%#8.8ux != %#8.8ux", magic, cmagic);
return -1;
}
p += U32Size;
c->info.type = vtfromdisktype(U8GET(p));
p += U8Size;
c->info.size = U16GET(p);
p += U16Size;
c->info.uncsize = U16GET(p);
p += U16Size;
scorecp(c->info.score, p);
p += VtScoreSize;
c->encoding = U8GET(p);
p += U8Size;
c->creator = U32GET(p);
p += U32Size;
c->time = U32GET(p);
p += U32Size;
if(buf + ClumpSize != p)
sysfatal("unpackclump unpacked wrong amount");
return checkclump(c);
}
int
packclump(Clump *c, u8int *buf, u32int magic)
{
u8int *p;
p = buf;
U32PUT(p, magic);
p += U32Size;
U8PUT(p, vttodisktype(c->info.type));
p += U8Size;
U16PUT(p, c->info.size);
p += U16Size;
U16PUT(p, c->info.uncsize);
p += U16Size;
scorecp(p, c->info.score);
p += VtScoreSize;
U8PUT(p, c->encoding);
p += U8Size;
U32PUT(p, c->creator);
p += U32Size;
U32PUT(p, c->time);
p += U32Size;
if(buf + ClumpSize != p)
sysfatal("packclump packed wrong amount");
return checkclump(c);
}
void
unpackclumpinfo(ClumpInfo *ci, u8int *buf)
{
u8int *p;
p = buf;
ci->type = vtfromdisktype(U8GET(p));
p += U8Size;
ci->size = U16GET(p);
p += U16Size;
ci->uncsize = U16GET(p);
p += U16Size;
scorecp(ci->score, p);
p += VtScoreSize;
if(buf + ClumpInfoSize != p)
sysfatal("unpackclumpinfo unpacked wrong amount");
}
void
packclumpinfo(ClumpInfo *ci, u8int *buf)
{
u8int *p;
p = buf;
U8PUT(p, vttodisktype(ci->type));
p += U8Size;
U16PUT(p, ci->size);
p += U16Size;
U16PUT(p, ci->uncsize);
p += U16Size;
scorecp(p, ci->score);
p += VtScoreSize;
if(buf + ClumpInfoSize != p)
sysfatal("packclumpinfo packed wrong amount");
}
int
unpackisect(ISect *is, u8int *buf)
{
u8int *p;
u32int m;
char fbuf[20];
p = buf;
m = U32GET(p);
if(m != ISectMagic){
seterr(ECorrupt, "index section has wrong magic number: %s expected ISectMagic (%lux)",
fmtmagic(fbuf, m), ISectMagic);
return -1;
}
p += U32Size;
is->version = U32GET(p);
p += U32Size;
namecp(is->name, (char*)p);
p += ANameSize;
namecp(is->index, (char*)p);
p += ANameSize;
is->blocksize = U32GET(p);
p += U32Size;
is->blockbase = U32GET(p);
p += U32Size;
is->blocks = U32GET(p);
p += U32Size;
is->start = U32GET(p);
p += U32Size;
is->stop = U32GET(p);
p += U32Size;
if(buf + ISectSize1 != p)
sysfatal("unpackisect unpacked wrong amount");
is->bucketmagic = 0;
if(is->version == ISectVersion2){
is->bucketmagic = U32GET(p);
p += U32Size;
if(buf + ISectSize2 != p)
sysfatal("unpackisect unpacked wrong amount");
}
return 0;
}
int
packisect(ISect *is, u8int *buf)
{
u8int *p;
p = buf;
U32PUT(p, ISectMagic);
p += U32Size;
U32PUT(p, is->version);
p += U32Size;
namecp((char*)p, is->name);
p += ANameSize;
namecp((char*)p, is->index);
p += ANameSize;
U32PUT(p, is->blocksize);
p += U32Size;
U32PUT(p, is->blockbase);
p += U32Size;
U32PUT(p, is->blocks);
p += U32Size;
U32PUT(p, is->start);
p += U32Size;
U32PUT(p, is->stop);
p += U32Size;
if(buf + ISectSize1 != p)
sysfatal("packisect packed wrong amount");
if(is->version == ISectVersion2){
U32PUT(p, is->bucketmagic);
p += U32Size;
if(buf + ISectSize2 != p)
sysfatal("packisect packed wrong amount");
}
return 0;
}
void
unpackientry(IEntry *ie, u8int *buf)
{
u8int *p;
p = buf;
scorecp(ie->score, p);
p += VtScoreSize;
ie->wtime = U32GET(p);
p += U32Size;
ie->train = U16GET(p);
p += U16Size;
ie->ia.addr = U64GET(p);
if(ie->ia.addr>>56) print("%.8H => %llux\n", p, ie->ia.addr);
p += U64Size;
ie->ia.size = U16GET(p);
p += U16Size;
if(p - buf != IEntryTypeOff)
sysfatal("unpackientry bad IEntryTypeOff amount");
ie->ia.type = vtfromdisktype(U8GET(p));
p += U8Size;
ie->ia.blocks = U8GET(p);
p += U8Size;
if(p - buf != IEntrySize)
sysfatal("unpackientry unpacked wrong amount");
}
void
packientry(IEntry *ie, u8int *buf)
{
u32int t32;
u8int *p;
p = buf;
scorecp(p, ie->score);
p += VtScoreSize;
U32PUT(p, ie->wtime);
p += U32Size;
U16PUT(p, ie->train);
p += U16Size;
U64PUT(p, ie->ia.addr, t32);
p += U64Size;
U16PUT(p, ie->ia.size);
p += U16Size;
U8PUT(p, vttodisktype(ie->ia.type));
p += U8Size;
U8PUT(p, ie->ia.blocks);
p += U8Size;
if(p - buf != IEntrySize)
sysfatal("packientry packed wrong amount");
}
void
unpackibucket(IBucket *b, u8int *buf, u32int magic)
{
b->n = U16GET(buf);
b->data = buf + IBucketSize;
if(magic && magic != U32GET(buf+U16Size))
b->n = 0;
}
void
packibucket(IBucket *b, u8int *buf, u32int magic)
{
U16PUT(buf, b->n);
U32PUT(buf+U16Size, magic);
}
void
packbloomhead(Bloom *b, u8int *buf)
{
u8int *p;
p = buf;
U32PUT(p, BloomMagic);
U32PUT(p+4, BloomVersion);
U32PUT(p+8, b->nhash);
U32PUT(p+12, b->size);
}
int
unpackbloomhead(Bloom *b, u8int *buf)
{
u8int *p;
u32int m;
char fbuf[20];
p = buf;
m = U32GET(p);
if(m != BloomMagic){
seterr(ECorrupt, "bloom filter has wrong magic number: %s expected BloomMagic (%lux)", fmtmagic(fbuf, m), (ulong)BloomMagic);
return -1;
}
p += U32Size;
m = U32GET(p);
if(m != BloomVersion){
seterr(ECorrupt, "bloom filter has wrong version %ud expected %ud", (uint)m, (uint)BloomVersion);
return -1;
}
p += U32Size;
b->nhash = U32GET(p);
p += U32Size;
b->size = U32GET(p);
p += U32Size;
if(buf + BloomHeadSize != p)
sysfatal("unpackarena unpacked wrong amount");
return 0;
}

718
src/cmd/venti/srv/dat.h Normal file
View File

@ -0,0 +1,718 @@
typedef struct Config Config;
typedef struct AMap AMap;
typedef struct AMapN AMapN;
typedef struct Arena Arena;
typedef struct AState AState;
typedef struct ArenaHead ArenaHead;
typedef struct ArenaPart ArenaPart;
typedef struct ArenaTail ArenaTail;
typedef struct ATailStats ATailStats;
typedef struct CIBlock CIBlock;
typedef struct Clump Clump;
typedef struct ClumpInfo ClumpInfo;
typedef struct Graph Graph;
typedef struct IAddr IAddr;
typedef struct IBucket IBucket;
typedef struct IEStream IEStream;
typedef struct IEntry IEntry;
typedef struct IFile IFile;
typedef struct ISect ISect;
typedef struct Index Index;
typedef struct Lump Lump;
typedef struct DBlock DBlock;
typedef struct Part Part;
typedef struct Statbin Statbin;
typedef struct Statdesc Statdesc;
typedef struct Stats Stats;
typedef struct ZBlock ZBlock;
typedef struct Round Round;
typedef struct Bloom Bloom;
#define TWID32 ((u32int)~(u32int)0)
#define TWID64 ((u64int)~(u64int)0)
#define TWID8 ((u8int)~(u8int)0)
enum
{
ABlockLog = 9, /* log2(512), the quantum for reading arenas */
ANameSize = 64,
MaxDiskBlock = 64*1024, /* max. allowed size for a disk block */
MaxIoSize = 64*1024, /* max. allowed size for a disk io operation */
PartBlank = 256*1024, /* untouched section at beginning of partition */
HeadSize = 512, /* size of a header after PartBlank */
MinArenaSize = 1*1024*1024, /* smallest reasonable arena size */
IndexBase = 1024*1024, /* initial address to use in an index */
MaxIo = 64*1024, /* max size of a single read or write operation */
ICacheBits = 16, /* default bits for indexing icache */
ICacheDepth = 4, /* default depth of an icache hash chain */
MaxAMap = 2*1024, /* max. allowed arenas in an address mapping; must be < 32*1024 */
/*
* return codes from syncarena
*/
SyncDataErr = 1 << 0, /* problem reading the clump data */
SyncCIErr = 1 << 1, /* found erroneous clump directory entries */
SyncCIZero = 1 << 2, /* found unwritten clump directory entries */
SyncFixErr = 1 << 3, /* error writing fixed data */
SyncHeader = 1 << 4, /* altered header fields */
/*
* error severity
*/
EOk = 0, /* error expected in normal operation */
EStrange, /* strange error that should be logged */
ECorrupt, /* corrupted data found in arenas */
EICorrupt, /* corrupted data found in index */
EAdmin, /* should be brought to administrators' attention */
ECrash, /* really bad internal error */
EBug, /* a limitation which should be fixed */
EInconsist, /* inconsistencies between index and arena */
EMax,
/*
* internal disk formats for the venti archival storage system
*/
/*
* magic numbers on disk
*/
_ClumpMagic = 0xd15cb10c, /* clump header, deprecated */
ClumpFreeMagic = 0, /* free clump; terminates active clump log */
ArenaPartMagic = 0xa9e4a5e7, /* arena partition header */
ArenaMagic = 0xf2a14ead, /* arena trailer */
ArenaHeadMagic = 0xd15c4ead, /* arena header */
BloomMagic = 0xb1004ead, /* bloom filter header */
BloomMaxHash = 32,
ISectMagic = 0xd15c5ec7, /* index header */
ArenaPartVersion = 3,
ArenaVersion4 = 4,
ArenaVersion5 = 5,
BloomVersion = 1,
IndexVersion = 1,
ISectVersion1 = 1,
ISectVersion2 = 2,
/*
* encodings of clumps on disk
*/
ClumpEErr = 0, /* can't happen */
ClumpENone, /* plain */
ClumpECompress, /* compressed */
ClumpEMax,
/*
* sizes in bytes on disk
*/
U8Size = 1,
U16Size = 2,
U32Size = 4,
U64Size = 8,
ArenaPartSize = 4 * U32Size,
ArenaSize4 = 2 * U64Size + 6 * U32Size + ANameSize + U8Size,
ArenaSize5 = ArenaSize4 + U32Size,
ArenaHeadSize4 = U64Size + 3 * U32Size + ANameSize,
ArenaHeadSize5 = ArenaHeadSize4 + U32Size,
BloomHeadSize = 4 * U32Size,
ISectSize1 = 7 * U32Size + 2 * ANameSize,
ISectSize2 = ISectSize1 + U32Size,
ClumpInfoSize = U8Size + 2 * U16Size + VtScoreSize,
ClumpSize = ClumpInfoSize + U8Size + 3 * U32Size,
MaxBloomSize = 1<<(32-3), /* 2^32 bits */
MaxBloomHash = 32, /* bits per score */
/*
* BUG - The various block copies that manipulate entry buckets
* would be faster if we bumped IBucketSize up to 8 and IEntrySize up to 40,
* so that everything is word-aligned. Buildindex is actually cpu-bound
* by the (byte at a time) copying in qsort.
*/
IBucketSize = U32Size + U16Size,
IEntrySize = U64Size + U32Size + 2*U16Size + 2*U8Size + VtScoreSize,
IEntryTypeOff = VtScoreSize + U64Size + U32Size + 2 * U16Size,
MaxClumpBlocks = (VtMaxLumpSize + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog,
/*
* dirty flags - order controls disk write order
*/
DirtyArena = 1,
DirtyArenaCib,
DirtyArenaTrailer,
DirtyMax,
VentiZZZZZZZZ
};
extern char TraceDisk[];
extern char TraceLump[];
extern char TraceBlock[];
extern char TraceProc[];
extern char TraceWork[];
extern char TraceQuiet[];
extern char TraceRpc[];
/*
* results of parsing and initializing a config file
*/
struct Config
{
char *index; /* name of the index to initialize */
int naparts; /* arena partitions initialized */
ArenaPart **aparts;
int nsects; /* index sections initialized */
ISect **sects;
Bloom *bloom; /* bloom filter */
u32int bcmem;
u32int mem;
u32int icmem;
int queuewrites;
char* haddr;
char* vaddr;
char* webroot;
};
/*
* a Part is the low level interface to files or disks.
* there are two main types of partitions
* arena paritions, which some number of arenas, each in a sub-partition.
* index partition, which only have one subpartition.
*/
struct Part
{
int fd; /* rock for accessing the disk */
int mode;
u64int offset;
u64int size; /* size of the partiton */
u32int blocksize; /* block size for reads and writes */
u32int fsblocksize; /* minimum file system block size */
char *name;
char *filename;
Channel *writechan; /* chan[dcache.nblock](DBlock*) */
};
/*
* a cached block from the partition
* yuck -- most of this is internal structure for the cache
* all other routines should only use data
*/
struct DBlock
{
u8int *data;
Part *part; /* partition in which cached */
u64int addr; /* base address on the partition */
u32int size; /* amount of data available, not amount allocated; should go away */
u32int mode;
u32int dirty;
u32int dirtying;
DBlock *next; /* doubly linked hash chains */
DBlock *prev;
u32int heap; /* index in heap table */
u32int used; /* last reference times */
u32int used2;
u32int ref; /* reference count */
RWLock lock; /* for access to data only */
Channel *writedonechan;
void* chanbuf[1]; /* buffer for the chan! */
};
/*
* a cached block from the partition
* yuck -- most of this is internal structure for the cache
* all other routines should only use data
* double yuck -- this is mostly the same as a DBlock
*/
struct Lump
{
Packet *data;
Part *part; /* partition in which cached */
u8int score[VtScoreSize]; /* score of packet */
u8int type; /* type of packet */
u32int size; /* amount of data allocated to hold packet */
Lump *next; /* doubly linked hash chains */
Lump *prev;
u32int heap; /* index in heap table */
u32int used; /* last reference times */
u32int used2;
u32int ref; /* reference count */
QLock lock; /* for access to data only */
};
/*
* mapping between names and address ranges
*/
struct AMap
{
u64int start;
u64int stop;
char name[ANameSize];
};
/*
* an AMap along with a length
*/
struct AMapN
{
int n;
AMap *map;
};
/*
* an ArenaPart is a partition made up of Arenas
* it exists because most os's don't support many partitions,
* and we want to have many different Arenas
*/
struct ArenaPart
{
Part *part;
u64int size; /* size of underlying partition, rounded down to blocks */
Arena **arenas;
u32int tabbase; /* base address of arena table on disk */
u32int tabsize; /* max. bytes in arena table */
/*
* fields stored on disk
*/
u32int version;
u32int blocksize; /* "optimal" block size for reads and writes */
u32int arenabase; /* base address of first arena */
/*
* stored in the arena mapping table on disk
*/
AMap *map;
int narenas;
};
/*
* info about one block in the clump info cache
*/
struct CIBlock
{
u32int block; /* blocks in the directory */
int offset; /* offsets of one clump in the data */
DBlock *data;
};
/*
* Statistics kept in the tail.
*/
struct ATailStats
{
u32int clumps; /* number of clumps */
u32int cclumps; /* number of compressed clumps */
u64int used;
u64int uncsize;
u8int sealed;
};
/*
* Arena state - represents a point in the data log
*/
struct AState
{
Arena *arena;
u64int aa; /* index address */
ATailStats stats;
};
/*
* an Arena is a log of Clumps, preceeded by an ArenaHeader,
* and followed by a Arena, each in one disk block.
* struct on disk is not always up to date, but should be self-consistent.
* to sync after reboot, follow clumps starting at used until ClumpFreeMagic if found.
* <struct name="Arena" type="Arena *">
* <field name="name" val="s->name" type="AName"/>
* <field name="version" val="s->version" type="U32int"/>
* <field name="partition" val="s->part->name" type="AName"/>
* <field name="blocksize" val="s->blocksize" type="U32int"/>
* <field name="start" val="s->base" type="U64int"/>
* <field name="stop" val="s->base+2*s->blocksize" type="U64int"/>
* <field name="created" val="s->ctime" type="U32int"/>
* <field name="modified" val="s->wtime" type="U32int"/>
* <field name="sealed" val="s->sealed" type="Sealed"/>
* <field name="score" val="s->score" type="Score"/>
* <field name="clumps" val="s->clumps" type="U32int"/>
* <field name="compressedclumps" val="s->cclumps" type="U32int"/>
* <field name="data" val="s->uncsize" type="U64int"/>
* <field name="compresseddata" val="s->used - s->clumps * ClumpSize" type="U64int"/>
* <field name="storage" val="s->used + s->clumps * ClumpInfoSize" type="U64int"/>
* </struct>
*/
struct Arena
{
QLock lock; /* lock for arena fields, writing to disk */
Part *part; /* partition in which arena lives */
int blocksize; /* size of block to read or write */
u64int base; /* base address on disk */
u64int size; /* total space in the arena */
u64int limit; /* storage limit for clumps */
u8int score[VtScoreSize]; /* score of the entire sealed & summed arena */
int clumpmax; /* ClumpInfos per block */
AState mem;
int inqueue;
DigestState sha1;
/*
* fields stored on disk
*/
u32int version;
char name[ANameSize]; /* text label */
ATailStats memstats;
ATailStats diskstats;
u32int ctime; /* first time a block was written */
u32int wtime; /* last time a block was written */
u32int clumpmagic;
};
/*
* redundant storage of some fields at the beginning of each arena
*/
struct ArenaHead
{
u32int version;
char name[ANameSize];
u32int blocksize;
u64int size;
u32int clumpmagic;
};
/*
* most interesting meta information for a clump.
* stored in each clump's header and in the Arena's directory,
* stored in reverse order just prior to the arena trailer
*/
struct ClumpInfo
{
u8int type;
u16int size; /* size of disk data, not including header */
u16int uncsize; /* size of uncompressed data */
u8int score[VtScoreSize]; /* score of the uncompressed data only */
};
/*
* header for an immutable clump of data
*/
struct Clump
{
ClumpInfo info;
u8int encoding;
u32int creator; /* initial client which wrote the block */
u32int time; /* creation at gmt seconds since 1/1/1970 */
};
/*
* index of all clumps according to their score
* this is just a wrapper to tie together the index sections
* <struct name="Index" type="Index *">
* <field name="name" val="s->name" type="AName"/>
* <field name="version" val="s->version" type="U32int"/>
* <field name="blocksize" val="s->blocksize" type="U32int"/>
* <field name="tabsize" val="s->tabsize" type="U32int"/>
* <field name="buckets" val="s->buckets" type="U32int"/>
* <field name="buckdiv" val="s->div" type="U32int"/>
* <field name="bitblocks" val="s->div" type="U32int"/>
* <field name="maxdepth" val="s->div" type="U32int"/>
* <field name="bitkeylog" val="s->div" type="U32int"/>
* <field name="bitkeymask" val="s->div" type="U32int"/>
* <array name="sect" val="&s->smap[i]" elems="s->nsects" type="Amap"/>
* <array name="amap" val="&s->amap[i]" elems="s->narenas" type="Amap"/>
* <array name="arena" val="s->arenas[i]" elems="s->narenas" type="Arena"/>
* </struct>
* <struct name="Amap" type="AMap *">
* <field name="name" val="s->name" type="AName"/>
* <field name="start" val="s->start" type="U64int"/>
* <field name="stop" val="s->stop" type="U64int"/>
* </struct>
*/
struct Index
{
u32int div; /* divisor for mapping score to bucket */
u32int buckets; /* last bucket used in disk hash table */
u32int blocksize;
u32int tabsize; /* max. bytes in index config */
u32int bitblocks; //XXX remove these fields
u32int maxdepth;
u32int bitkeylog;
u32int bitkeymask;
int mapalloc; /* first arena to check when adding a lump */
Arena **arenas; /* arenas in the mapping */
ISect **sects; /* sections which hold the buckets */
Bloom *bloom; /* bloom filter */
/*
* fields stored in config file
*/
u32int version;
char name[ANameSize]; /* text label */
int nsects;
AMap *smap; /* mapping of buckets to index sections */
int narenas;
AMap *amap; /* mapping from index addesses to arenas */
};
/*
* one part of the bucket storage for an index.
* the index blocks are sequentially allocated
* across all of the sections.
*/
struct ISect
{
Part *part;
int blocklog; /* log2(blocksize) */
int buckmax; /* max. entries in a index bucket */
u32int tabbase; /* base address of index config table on disk */
u32int tabsize; /* max. bytes in index config */
Channel *writechan;
Channel *writedonechan;
/*
* fields stored on disk
*/
u32int version;
u32int bucketmagic;
char name[ANameSize]; /* text label */
char index[ANameSize]; /* index owning the section */
u32int blocksize; /* size of hash buckets in index */
u32int blockbase; /* address of start of on disk index table */
u32int blocks; /* total blocks on disk; some may be unused */
u32int start; /* first bucket in this section */
u32int stop; /* limit of buckets in this section */
};
/*
* externally interesting part of an IEntry
*/
struct IAddr
{
u64int addr;
u16int size; /* uncompressed size */
u8int type; /* type of block */
u8int blocks; /* arena io quanta for Clump + data */
};
/*
* entries in the index
* kept in IBuckets in the disk index table,
* cached in the memory ICache.
*/
struct IEntry
{
u8int score[VtScoreSize];
IEntry *next; /* next in hash chain */
IEntry *nextdirty; /* next in dirty chain */
u32int wtime; /* last write time */
u16int train; /* relative train containing the most recent ref; 0 if no ref, 1 if in same car */
u8int rac; /* read ahead count */
u8int dirty; /* is dirty */
IAddr ia;
};
/*
* buckets in the on disk index table
*/
struct IBucket
{
u16int n; /* number of active indices */
u32int buck; /* used by buildindex/checkindex only */
u8int *data;
};
/*
* temporary buffers used by individual threads
*/
struct ZBlock
{
u32int len;
u32int _size;
u8int *data;
u8int *free;
};
/*
* simple input buffer for a '\0' terminated text file
*/
struct IFile
{
char *name; /* name of the file */
ZBlock *b; /* entire contents of file */
u32int pos; /* current position in the file */
};
struct Statdesc
{
char *name;
ulong max;
};
/* keep in sync with stats.c:/statdesc and httpd.c:/graphname*/
enum
{
StatRpcTotal,
StatRpcRead,
StatRpcReadOk,
StatRpcReadFail,
StatRpcReadBytes,
StatRpcReadTime,
StatRpcReadCached,
StatRpcReadCachedTime,
StatRpcReadUncached,
StatRpcReadUncachedTime,
StatRpcWrite,
StatRpcWriteNew,
StatRpcWriteOld,
StatRpcWriteFail,
StatRpcWriteBytes,
StatRpcWriteTime,
StatRpcWriteNewTime,
StatRpcWriteOldTime,
StatLcacheHit,
StatLcacheMiss,
StatLcacheRead,
StatLcacheWrite,
StatLcacheSize,
StatLcacheStall,
StatLcacheReadTime,
StatDcacheHit,
StatDcacheMiss,
StatDcacheLookup,
StatDcacheRead,
StatDcacheWrite,
StatDcacheDirty,
StatDcacheSize,
StatDcacheFlush,
StatDcacheStall,
StatDcacheLookupTime,
StatDblockStall,
StatLumpStall,
StatIcacheHit,
StatIcacheMiss,
StatIcacheRead,
StatIcacheWrite,
StatIcacheFill,
StatIcachePrefetch,
StatIcacheDirty,
StatIcacheSize,
StatIcacheFlush,
StatIcacheStall,
StatIcacheReadTime,
StatBloomHit,
StatBloomMiss,
StatBloomFalseMiss,
StatBloomLookup,
StatBloomOnes,
StatBloomBits,
StatBloomLookupTime,
StatApartRead,
StatApartReadBytes,
StatApartWrite,
StatApartWriteBytes,
StatIsectRead,
StatIsectReadBytes,
StatIsectWrite,
StatIsectWriteBytes,
StatSumRead,
StatSumReadBytes,
NStat
};
extern Statdesc statdesc[NStat];
/*
* statistics about the operation of the server
* mainly for performance monitoring and profiling.
*/
struct Stats
{
ulong now;
ulong n[NStat];
};
struct Statbin
{
uint nsamp;
uint min;
uint max;
uint avg;
};
struct Graph
{
long (*fn)(Stats*, Stats*, void*);
void *arg;
long t0;
long t1;
long min;
long max;
long wid;
long ht;
int fill;
};
/*
* for kicking background processes that run one round after another after another
*/
struct Round
{
QLock lock;
Rendez start;
Rendez finish;
Rendez delaywait;
int delaytime;
int delaykick;
char* name;
int last;
int current;
int next;
int doanother;
};
/*
* Bloom filter of stored block hashes
*/
struct Bloom
{
RWLock lk; /* protects nhash, nbits, tab, mb */
QLock mod; /* one marker at a time, protects nb */
int nhash;
ulong size; /* bytes in tab */
ulong mask; /* to produce index */
u8int *data;
Part *part;
Channel *writechan;
Channel *writedonechan;
};
extern Index *mainindex;
extern u32int maxblocksize; /* max. block size used by any partition */
extern int paranoid; /* should verify hashes on disk read */
extern int queuewrites; /* put all lump writes on a queue and finish later */
extern int readonly; /* only allowed to read the disk data */
extern Stats stats;
extern u8int zeroscore[VtScoreSize];
extern int compressblocks;
extern int writestodevnull; /* dangerous - for performance debugging */
extern int collectstats;
extern QLock memdrawlock;
extern int icachesleeptime;
extern int arenasumsleeptime;
#ifndef PLAN9PORT
#pragma varargck type "V" uchar*
#define ODIRECT 0
#endif

816
src/cmd/venti/srv/dcache.c Normal file
View File

@ -0,0 +1,816 @@
/*
* Disk cache.
*
* Caches raw disk blocks. Getdblock() gets a block, putdblock puts it back.
* Getdblock has a mode parameter that determines i/o and access to a block:
* if mode is OREAD or ORDWR, it is read from disk if not already in memory.
* If mode is ORDWR or OWRITE, it is locked for exclusive use before being returned.
* It is *not* marked dirty -- once changes have been made, they should be noted
* by using dirtydblock() before putdblock().
*
* There is a global cache lock as well as a lock on each block.
* Within a thread, the cache lock can be acquired while holding a block lock,
* but not vice versa; and a block cannot be locked if you already hold the lock
* on another block.
*
* The flush proc writes out dirty blocks in batches, one batch per dirty tag.
* For example, the DirtyArena blocks are all written to disk before any of the
* DirtyArenaCib blocks.
*
* This code used to be in charge of flushing the dirty index blocks out to
* disk, but updating the index turned out to benefit from extra care.
* Now cached index blocks are never marked dirty. The index.c code takes
* care of updating them behind our back, and uses _getdblock to update any
* cached copies of the blocks as it changes them on disk.
*/
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
typedef struct DCache DCache;
enum
{
HashLog = 9,
HashSize = 1<<HashLog,
HashMask = HashSize - 1,
};
struct DCache
{
QLock lock;
RWLock dirtylock; /* must be held to inspect or set b->dirty */
Rendez full;
Round round;
DBlock *free; /* list of available lumps */
u32int now; /* ticks for usage timestamps */
int size; /* max. size of any block; allocated to each block */
DBlock **heads; /* hash table for finding address */
int nheap; /* number of available victims */
DBlock **heap; /* heap for locating victims */
int nblocks; /* number of blocks allocated */
DBlock *blocks; /* array of block descriptors */
DBlock **write; /* array of block pointers to be written */
u8int *mem; /* memory for all block descriptors */
int ndirty; /* number of dirty blocks */
int maxdirty; /* max. number of dirty blocks */
Channel *ra;
u8int *rabuf;
u32int ramax;
u32int rasize;
u64int raaddr;
Part *rapart;
AState diskstate;
AState state;
};
typedef struct Ra Ra;
struct Ra
{
Part *part;
u64int addr;
};
static DCache dcache;
static int downheap(int i, DBlock *b);
static int upheap(int i, DBlock *b);
static DBlock *bumpdblock(void);
static void delheap(DBlock *db);
static void fixheap(int i, DBlock *b);
static void flushproc(void*);
static void writeproc(void*);
static void raproc(void*);
void
initdcache(u32int mem)
{
DBlock *b, *last;
u32int nblocks, blocksize;
int i;
u8int *p;
if(mem < maxblocksize * 2)
sysfatal("need at least %d bytes for the disk cache", maxblocksize * 2);
if(maxblocksize == 0)
sysfatal("no max. block size given for disk cache");
blocksize = maxblocksize;
nblocks = mem / blocksize;
dcache.full.l = &dcache.lock;
dcache.nblocks = nblocks;
dcache.maxdirty = (nblocks * 2) / 3;
trace(TraceProc, "initialize disk cache with %d blocks of %d bytes, maximum %d dirty blocks\n",
nblocks, blocksize, dcache.maxdirty);
dcache.size = blocksize;
dcache.heads = MKNZ(DBlock*, HashSize);
dcache.heap = MKNZ(DBlock*, nblocks);
dcache.blocks = MKNZ(DBlock, nblocks);
dcache.write = MKNZ(DBlock*, nblocks);
dcache.mem = MKNZ(u8int, (nblocks+1+128) * blocksize);
dcache.ra = chancreate(sizeof(Ra), 0);
last = nil;
p = (u8int*)(((ulong)dcache.mem+blocksize-1)&~(ulong)(blocksize-1));
for(i = 0; i < nblocks; i++){
b = &dcache.blocks[i];
b->data = &p[i * blocksize];
b->heap = TWID32;
b->writedonechan = chancreate(sizeof(void*), 1);
b->next = last;
last = b;
}
dcache.rabuf = &p[i*blocksize];
dcache.ramax = 128*blocksize;
dcache.raaddr = 0;
dcache.rapart = nil;
dcache.free = last;
dcache.nheap = 0;
setstat(StatDcacheSize, nblocks);
initround(&dcache.round, "dcache", 120*1000);
vtproc(flushproc, nil);
vtproc(delaykickroundproc, &dcache.round);
vtproc(raproc, nil);
}
void
setdcachestate(AState *a)
{
trace(TraceBlock, "setdcachestate %s 0x%llux clumps %d", a->arena ? a->arena->name : nil, a->aa, a->stats.clumps);
qlock(&dcache.lock);
dcache.state = *a;
qunlock(&dcache.lock);
}
AState
diskstate(void)
{
AState a;
qlock(&dcache.lock);
a = dcache.diskstate;
qunlock(&dcache.lock);
return a;
}
static void
raproc(void *v)
{
Ra ra;
DBlock *b;
USED(v);
while(recv(dcache.ra, &ra) == 1){
if(ra.part->size <= ra.addr)
continue;
b = _getdblock(ra.part, ra.addr, OREAD, 2);
putdblock(b);
}
}
void
dreadahead(Part *part, u64int addr, int miss)
{
Ra ra;
static struct {
Part *part;
u64int addr;
} lastmiss;
static struct {
Part *part;
u64int addr;
int dir;
} lastra;
return;
if(miss){
if(lastmiss.part==part && lastmiss.addr==addr-dcache.size){
XRa:
lastra.part = part;
lastra.dir = addr-lastmiss.addr;
lastra.addr = addr+lastra.dir;
ra.part = part;
ra.addr = lastra.addr;
nbsend(dcache.ra, &ra);
}else if(lastmiss.part==part && lastmiss.addr==addr+dcache.size){
addr -= dcache.size;
goto XRa;
}
}else{
if(lastra.part==part && lastra.addr==addr){
lastra.addr += lastra.dir;
ra.part = part;
ra.addr = lastra.addr;
nbsend(dcache.ra, &ra);
}
}
if(miss){
lastmiss.part = part;
lastmiss.addr = addr;
}
// fprint(2, "%s %llx %s\n", part->name, addr, miss ? "miss" : "hit");
}
int
rareadpart(Part *part, u64int addr, u8int *buf, uint n, int load)
{
uint nn;
static RWLock ralock;
rlock(&ralock);
if(dcache.rapart==part && dcache.raaddr <= addr && addr+n <= dcache.raaddr+dcache.rasize){
memmove(buf, dcache.rabuf+(addr-dcache.raaddr), n);
runlock(&ralock);
return 0;
}
if(load != 2 || addr >= part->size){ /* addr >= part->size: let readpart do the error */
runlock(&ralock);
return readpart(part, addr, buf, n);
}
runlock(&ralock);
wlock(&ralock);
fprint(2, "raread %s %llx\n", part->name, addr);
nn = dcache.ramax;
if(addr+nn > part->size)
nn = part->size - addr;
if(readpart(part, addr, dcache.rabuf, nn) < 0){
wunlock(&ralock);
return -1;
}
memmove(buf, dcache.rabuf, n);
dcache.rapart = part;
dcache.rasize = nn;
dcache.raaddr = addr;
wunlock(&ralock);
addstat(StatApartReadBytes, nn-n);
return 0;
}
static u32int
pbhash(u64int addr)
{
u32int h;
#define hashit(c) ((((c) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask)
h = (addr >> 32) ^ addr;
return hashit(h);
}
DBlock*
getdblock(Part *part, u64int addr, int mode)
{
DBlock *b;
uint ms;
ms = msec();
b = _getdblock(part, addr, mode, 1);
if(mode == OREAD || mode == ORDWR)
addstat(StatDcacheRead, 1);
if(mode == OWRITE || mode == ORDWR)
addstat(StatDcacheWrite, 1);
ms = msec() - ms;
addstat2(StatDcacheLookup, 1, StatDcacheLookupTime, ms);
return b;
}
DBlock*
_getdblock(Part *part, u64int addr, int mode, int load)
{
DBlock *b;
u32int h, size;
trace(TraceBlock, "getdblock enter %s 0x%llux", part->name, addr);
size = part->blocksize;
if(size > dcache.size){
seterr(EAdmin, "block size %d too big for cache with size %d", size, dcache.size);
return nil;
}
h = pbhash(addr);
/*
* look for the block in the cache
*/
//checkdcache();
qlock(&dcache.lock);
again:
for(b = dcache.heads[h]; b != nil; b = b->next){
if(b->part == part && b->addr == addr){
/*
qlock(&stats.lock);
stats.pchit++;
qunlock(&stats.lock);
*/
if(load){
addstat(StatDcacheHit, 1);
if(load != 2 && mode != OWRITE)
dreadahead(part, b->addr, 0);
}
goto found;
}
}
/*
* missed: locate the block with the oldest second to last use.
* remove it from the heap, and fix up the heap.
*/
if(!load){
qunlock(&dcache.lock);
return nil;
}
addstat(StatDcacheMiss, 1);
b = bumpdblock();
if(b == nil){
trace(TraceBlock, "all disk cache blocks in use");
addstat(StatDcacheStall, 1);
rsleep(&dcache.full);
addstat(StatDcacheStall, -1);
goto again;
}
assert(!b->dirty);
/*
* the new block has no last use, so assume it happens sometime in the middle
ZZZ this is not reasonable
*/
b->used = (b->used2 + dcache.now) / 2;
/*
* rechain the block on the correct hash chain
*/
b->next = dcache.heads[h];
dcache.heads[h] = b;
if(b->next != nil)
b->next->prev = b;
b->prev = nil;
b->addr = addr;
b->part = part;
b->size = 0;
if(load != 2 && mode != OWRITE)
dreadahead(part, b->addr, 1);
found:
b->ref++;
b->used2 = b->used;
b->used = dcache.now++;
if(b->heap != TWID32)
fixheap(b->heap, b);
qunlock(&dcache.lock);
//checkdcache();
trace(TraceBlock, "getdblock lock");
addstat(StatDblockStall, 1);
if(mode == OREAD)
rlock(&b->lock);
else
wlock(&b->lock);
addstat(StatDblockStall, -1);
trace(TraceBlock, "getdblock locked");
if(b->size != size){
if(mode == OREAD){
addstat(StatDblockStall, 1);
runlock(&b->lock);
wlock(&b->lock);
addstat(StatDblockStall, -1);
}
if(b->size < size){
if(mode == OWRITE)
memset(&b->data[b->size], 0, size - b->size);
else{
trace(TraceBlock, "getdblock readpart %s 0x%llux", part->name, addr);
if(rareadpart(part, addr + b->size, &b->data[b->size], size - b->size, load) < 0){
b->mode = ORDWR; /* so putdblock wunlocks */
putdblock(b);
return nil;
}
trace(TraceBlock, "getdblock readpartdone");
addstat(StatApartRead, 1);
addstat(StatApartReadBytes, size-b->size);
}
}
b->size = size;
if(mode == OREAD){
addstat(StatDblockStall, 1);
wunlock(&b->lock);
rlock(&b->lock);
addstat(StatDblockStall, -1);
}
}
b->mode = mode;
trace(TraceBlock, "getdblock exit");
return b;
}
void
putdblock(DBlock *b)
{
if(b == nil)
return;
trace(TraceBlock, "putdblock %s 0x%llux", b->part->name, b->addr);
if(b->mode == OREAD)
runlock(&b->lock);
else
wunlock(&b->lock);
//checkdcache();
qlock(&dcache.lock);
if(--b->ref == 0 && !b->dirty){
if(b->heap == TWID32)
upheap(dcache.nheap++, b);
rwakeupall(&dcache.full);
}
qunlock(&dcache.lock);
//checkdcache();
}
void
dirtydblock(DBlock *b, int dirty)
{
int odirty;
Part *p;
trace(TraceBlock, "dirtydblock enter %s 0x%llux %d from 0x%lux", b->part->name, b->addr, dirty, getcallerpc(&b));
assert(b->ref != 0);
assert(b->mode==ORDWR || b->mode==OWRITE);
odirty = b->dirty;
if(b->dirty)
assert(b->dirty == dirty);
else
b->dirty = dirty;
p = b->part;
if(p->writechan == nil){
trace(TraceBlock, "dirtydblock allocwriteproc %s", p->name);
/* XXX hope this doesn't fail! */
p->writechan = chancreate(sizeof(DBlock*), dcache.nblocks);
vtproc(writeproc, p);
}
qlock(&dcache.lock);
if(!odirty){
dcache.ndirty++;
setstat(StatDcacheDirty, dcache.ndirty);
if(dcache.ndirty >= dcache.maxdirty)
kickround(&dcache.round, 0);
else
delaykickround(&dcache.round);
}
qunlock(&dcache.lock);
}
/*
* remove some block from use and update the free list and counters
*/
static DBlock*
bumpdblock(void)
{
DBlock *b;
ulong h;
trace(TraceBlock, "bumpdblock enter");
b = dcache.free;
if(b != nil){
dcache.free = b->next;
return b;
}
if(dcache.ndirty >= dcache.maxdirty)
kickdcache();
/*
* remove blocks until we find one that is unused
* referenced blocks are left in the heap even though
* they can't be scavenged; this is simple a speed optimization
*/
for(;;){
if(dcache.nheap == 0){
kickdcache();
trace(TraceBlock, "bumpdblock gotnothing");
return nil;
}
b = dcache.heap[0];
delheap(b);
if(!b->ref && !b->dirty)
break;
}
trace(TraceBlock, "bumpdblock bumping %s 0x%llux", b->part->name, b->addr);
/*
* unchain the block
*/
if(b->prev == nil){
h = pbhash(b->addr);
if(dcache.heads[h] != b)
sysfatal("bad hash chains in disk cache");
dcache.heads[h] = b->next;
}else
b->prev->next = b->next;
if(b->next != nil)
b->next->prev = b->prev;
return b;
}
/*
* delete an arbitrary block from the heap
*/
static void
delheap(DBlock *db)
{
if(db->heap == TWID32)
return;
fixheap(db->heap, dcache.heap[--dcache.nheap]);
db->heap = TWID32;
}
/*
* push an element up or down to it's correct new location
*/
static void
fixheap(int i, DBlock *b)
{
if(upheap(i, b) == i)
downheap(i, b);
}
static int
upheap(int i, DBlock *b)
{
DBlock *bb;
u32int now;
int p;
now = dcache.now;
for(; i != 0; i = p){
p = (i - 1) >> 1;
bb = dcache.heap[p];
if(b->used2 - now >= bb->used2 - now)
break;
dcache.heap[i] = bb;
bb->heap = i;
}
dcache.heap[i] = b;
b->heap = i;
return i;
}
static int
downheap(int i, DBlock *b)
{
DBlock *bb;
u32int now;
int k;
now = dcache.now;
for(; ; i = k){
k = (i << 1) + 1;
if(k >= dcache.nheap)
break;
if(k + 1 < dcache.nheap && dcache.heap[k]->used2 - now > dcache.heap[k + 1]->used2 - now)
k++;
bb = dcache.heap[k];
if(b->used2 - now <= bb->used2 - now)
break;
dcache.heap[i] = bb;
bb->heap = i;
}
dcache.heap[i] = b;
b->heap = i;
return i;
}
static void
findblock(DBlock *bb)
{
DBlock *b, *last;
int h;
last = nil;
h = pbhash(bb->addr);
for(b = dcache.heads[h]; b != nil; b = b->next){
if(last != b->prev)
sysfatal("bad prev link");
if(b == bb)
return;
last = b;
}
sysfatal("block missing from hash table");
}
void
checkdcache(void)
{
DBlock *b;
u32int size, now;
int i, k, refed, nfree;
qlock(&dcache.lock);
size = dcache.size;
now = dcache.now;
for(i = 0; i < dcache.nheap; i++){
if(dcache.heap[i]->heap != i)
sysfatal("dc: mis-heaped at %d: %d", i, dcache.heap[i]->heap);
if(i > 0 && dcache.heap[(i - 1) >> 1]->used2 - now > dcache.heap[i]->used2 - now)
sysfatal("dc: bad heap ordering");
k = (i << 1) + 1;
if(k < dcache.nheap && dcache.heap[i]->used2 - now > dcache.heap[k]->used2 - now)
sysfatal("dc: bad heap ordering");
k++;
if(k < dcache.nheap && dcache.heap[i]->used2 - now > dcache.heap[k]->used2 - now)
sysfatal("dc: bad heap ordering");
}
refed = 0;
for(i = 0; i < dcache.nblocks; i++){
b = &dcache.blocks[i];
if(b->data != &dcache.mem[i * size])
sysfatal("dc: mis-blocked at %d", i);
if(b->ref && b->heap == TWID32)
refed++;
if(b->addr)
findblock(b);
if(b->heap != TWID32
&& dcache.heap[b->heap] != b)
sysfatal("dc: spurious heap value");
}
nfree = 0;
for(b = dcache.free; b != nil; b = b->next){
if(b->addr != 0 || b->heap != TWID32)
sysfatal("dc: bad free list");
nfree++;
}
if(dcache.nheap + nfree + refed != dcache.nblocks)
sysfatal("dc: missing blocks: %d %d %d", dcache.nheap, refed, dcache.nblocks);
qunlock(&dcache.lock);
}
void
flushdcache(void)
{
trace(TraceProc, "flushdcache enter");
kickround(&dcache.round, 1);
trace(TraceProc, "flushdcache exit");
}
void
kickdcache(void)
{
kickround(&dcache.round, 0);
}
static int
parallelwrites(DBlock **b, DBlock **eb, int dirty)
{
DBlock **p, **q;
for(p=b; p<eb && (*p)->dirty == dirty; p++){
assert(b<=p && p<eb);
sendp((*p)->part->writechan, *p);
}
q = p;
for(p=b; p<q; p++){
assert(b<=p && p<eb);
recvp((*p)->writedonechan);
}
return p-b;
}
/*
* Sort first by dirty flag, then by partition, then by address in partition.
*/
static int
writeblockcmp(const void *va, const void *vb)
{
DBlock *a, *b;
a = *(DBlock**)va;
b = *(DBlock**)vb;
if(a->dirty != b->dirty)
return a->dirty - b->dirty;
if(a->part != b->part){
if(a->part < b->part)
return -1;
if(a->part > b->part)
return 1;
}
if(a->addr < b->addr)
return -1;
return 1;
}
static void
flushproc(void *v)
{
int i, j, n;
ulong t0;
DBlock *b, **write;
AState as;
USED(v);
threadsetname("flushproc");
for(;;){
waitforkick(&dcache.round);
trace(TraceWork, "start");
qlock(&dcache.lock);
as = dcache.state;
qunlock(&dcache.lock);
t0 = nsec()/1000;
trace(TraceProc, "build t=%lud", (ulong)(nsec()/1000)-t0);
write = dcache.write;
n = 0;
for(i=0; i<dcache.nblocks; i++){
b = &dcache.blocks[i];
if(b->dirty)
write[n++] = b;
}
qsort(write, n, sizeof(write[0]), writeblockcmp);
/* Write each stage of blocks out. */
trace(TraceProc, "writeblocks t=%lud", (ulong)(nsec()/1000)-t0);
i = 0;
for(j=1; j<DirtyMax; j++){
trace(TraceProc, "writeblocks.%d t=%lud", j, (ulong)(nsec()/1000)-t0);
i += parallelwrites(write+i, write+n, j);
}
if(i != n){
fprint(2, "in flushproc i=%d n=%d\n", i, n);
for(i=0; i<n; i++)
fprint(2, "\tblock %d: dirty=%d\n", i, write[i]->dirty);
abort();
}
/* XXX
* the locking here is suspect. what if a block is redirtied
* after the write happens? we'll still decrement dcache.ndirty here.
*/
trace(TraceProc, "undirty.%d t=%lud", j, (ulong)(nsec()/1000)-t0);
qlock(&dcache.lock);
dcache.diskstate = as;
for(i=0; i<n; i++){
b = write[i];
--dcache.ndirty;
if(b->ref == 0 && b->heap == TWID32){
upheap(dcache.nheap++, b);
rwakeupall(&dcache.full);
}
}
setstat(StatDcacheDirty, dcache.ndirty);
qunlock(&dcache.lock);
addstat(StatDcacheFlush, 1);
trace(TraceWork, "finish");
}
}
static void
writeproc(void *v)
{
DBlock *b;
Part *p;
p = v;
threadsetname("writeproc:%s", p->name);
for(;;){
b = recvp(p->writechan);
trace(TraceWork, "start");
assert(b->part == p);
trace(TraceProc, "wlock %s 0x%llux", p->name, b->addr);
wlock(&b->lock);
trace(TraceProc, "writepart %s 0x%llux", p->name, b->addr);
if(writepart(p, b->addr, b->data, b->size) < 0)
fprint(2, "write error: %r\n"); /* XXX details! */
addstat(StatApartWrite, 1);
addstat(StatApartWriteBytes, b->size);
b->dirty = 0;
wunlock(&b->lock);
trace(TraceProc, "finish %s 0x%llux", p->name, b->addr);
trace(TraceWork, "finish");
sendp(b->writedonechan, b);
}
}

47
src/cmd/venti/srv/dump.c Normal file
View File

@ -0,0 +1,47 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
void
printindex(int fd, Index *ix)
{
int i;
fprint(fd, "index=%s version=%d blocksize=%d tabsize=%d\n",
ix->name, ix->version, ix->blocksize, ix->tabsize);
fprint(fd, "\tbuckets=%d div=%d\n", ix->buckets, ix->div);
for(i = 0; i < ix->nsects; i++)
fprint(fd, "\tsect=%s for buckets [%lld,%lld)\n", ix->smap[i].name, ix->smap[i].start, ix->smap[i].stop);
for(i = 0; i < ix->narenas; i++)
fprint(fd, "\tarena=%s at [%lld,%lld)\n", ix->amap[i].name, ix->amap[i].start, ix->amap[i].stop);
}
void
printarenapart(int fd, ArenaPart *ap)
{
int i;
fprint(fd, "arena partition=%s\n\tversion=%d blocksize=%d arenas=%d\n\tsetbase=%d setsize=%d\n",
ap->part->name, ap->version, ap->blocksize, ap->narenas, ap->tabbase, ap->tabsize);
for(i = 0; i < ap->narenas; i++)
fprint(fd, "\tarena=%s at [%lld,%lld)\n", ap->map[i].name, ap->map[i].start, ap->map[i].stop);
}
void
printarena(int fd, Arena *arena)
{
fprint(fd, "arena='%s' [%lld,%lld)\n\tversion=%d created=%d modified=%d",
arena->name, arena->base, arena->base + arena->size + 2 * arena->blocksize,
arena->version, arena->ctime, arena->wtime);
if(arena->memstats.sealed)
fprint(2, " sealed\n");
else
fprint(2, "\n");
if(scorecmp(zeroscore, arena->score) != 0)
fprint(2, "\tscore=%V\n", arena->score);
fprint(fd, "\tclumps=%,d compressed clumps=%,d data=%,lld compressed data=%,lld disk storage=%,lld\n",
arena->memstats.clumps, arena->memstats.cclumps, arena->memstats.uncsize,
arena->memstats.used - arena->memstats.clumps * ClumpSize,
arena->memstats.used + arena->memstats.clumps * ClumpInfoSize);
}

View File

@ -0,0 +1,121 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
enum
{
ClumpChunks = 32*1024
};
static int verbose;
int
clumpinfoeq(ClumpInfo *c, ClumpInfo *d)
{
return c->type == d->type
&& c->size == d->size
&& c->uncsize == d->uncsize
&& scorecmp(c->score, d->score)==0;
}
int
findscore(Arena *arena, uchar *score)
{
IEntry ie;
ClumpInfo *ci, *cis;
u64int a;
u32int clump;
int i, n, found;
//ZZZ remove fprint?
if(arena->memstats.clumps)
fprint(2, "reading directory for arena=%s with %d entries\n", arena->name, arena->memstats.clumps);
cis = MKN(ClumpInfo, ClumpChunks);
found = 0;
a = 0;
memset(&ie, 0, sizeof(IEntry));
for(clump = 0; clump < arena->memstats.clumps; clump += n){
n = ClumpChunks;
if(n > arena->memstats.clumps - clump)
n = arena->memstats.clumps - clump;
if(readclumpinfos(arena, clump, cis, n) != n){
seterr(EOk, "arena directory read failed: %r");
break;
}
for(i = 0; i < n; i++){
ci = &cis[i];
if(scorecmp(score, ci->score)==0){
fprint(2, "found at clump=%d with type=%d size=%d csize=%d position=%lld\n",
clump + i, ci->type, ci->uncsize, ci->size, a);
found++;
}
a += ci->size + ClumpSize;
}
}
free(cis);
return found;
}
void
usage(void)
{
fprint(2, "usage: findscore [-v] arenafile score\n");
threadexitsall(0);
}
void
threadmain(int argc, char *argv[])
{
ArenaPart *ap;
Part *part;
char *file;
u8int score[VtScoreSize];
int i, found;
ventifmtinstall();
ARGBEGIN{
case 'v':
verbose++;
break;
default:
usage();
break;
}ARGEND
readonly = 1;
if(argc != 2)
usage();
file = argv[0];
if(strscore(argv[1], score) < 0)
sysfatal("bad score %s\n", argv[1]);
part = initpart(file, OREAD|ODIRECT);
if(part == nil)
sysfatal("can't open partition %s: %r", file);
ap = initarenapart(part);
if(ap == nil)
sysfatal("can't initialize arena partition in %s: %r", file);
if(verbose > 1){
printarenapart(2, ap);
fprint(2, "\n");
}
initdcache(8 * MaxDiskBlock);
found = 0;
for(i = 0; i < ap->narenas; i++)
found += findscore(ap->arenas[i], score);
print("found %d occurrences of %V\n", found, score);
if(verbose > 1)
printstats();
threadexitsall(0);
}

View File

@ -0,0 +1,135 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
#ifndef ODIRECT
#define ODIRECT 0
#endif
void
usage(void)
{
fprint(2, "usage: fmtarenas [-Z] [-b blocksize] [-a arenasize] name file\n");
threadexitsall(0);
}
void
threadmain(int argc, char *argv[])
{
int vers;
ArenaPart *ap;
Part *part;
Arena *arena;
u64int addr, limit, asize, apsize;
char *file, *name, aname[ANameSize];
int i, n, blocksize, tabsize, zero;
ventifmtinstall();
statsinit();
blocksize = 8 * 1024;
asize = 512 * 1024 *1024;
tabsize = 512 * 1024; /* BUG: should be determine from number of arenas */
zero = -1;
vers = ArenaVersion5;
ARGBEGIN{
case 'D':
settrace(EARGF(usage()));
break;
case 'a':
asize = unittoull(ARGF());
if(asize == TWID64)
usage();
break;
case 'b':
blocksize = unittoull(ARGF());
if(blocksize == ~0)
usage();
if(blocksize > MaxDiskBlock){
fprint(2, "block size too large, max %d\n", MaxDiskBlock);
threadexitsall("usage");
}
break;
case '4':
vers = ArenaVersion4;
break;
case 'Z':
zero = 0;
break;
default:
usage();
break;
}ARGEND
if(zero == -1){
if(vers == ArenaVersion4)
zero = 1;
else
zero = 0;
}
if(argc != 2)
usage();
name = argv[0];
file = argv[1];
if(nameok(name) < 0)
sysfatal("illegal name template %s", name);
part = initpart(file, ORDWR|ODIRECT);
if(part == nil)
sysfatal("can't open partition %s: %r", file);
if(zero)
zeropart(part, blocksize);
maxblocksize = blocksize;
initdcache(20*blocksize);
ap = newarenapart(part, blocksize, tabsize);
if(ap == nil)
sysfatal("can't initialize arena: %r");
apsize = ap->size - ap->arenabase;
n = apsize / asize;
if(apsize - (n * asize) >= MinArenaSize)
n++;
fprint(2, "fmtarenas %s: %,d arenas, %,lld bytes storage, %,d bytes for index map\n",
file, n, apsize, ap->tabsize);
ap->narenas = n;
ap->map = MKNZ(AMap, n);
ap->arenas = MKNZ(Arena*, n);
addr = ap->arenabase;
for(i = 0; i < n; i++){
limit = addr + asize;
if(limit >= ap->size || ap->size - limit < MinArenaSize){
limit = ap->size;
if(limit - addr < MinArenaSize)
sysfatal("bad arena set math: runt arena at %lld,%lld %lld\n", addr, limit, ap->size);
}
snprint(aname, ANameSize, "%s%d", name, i);
if(0) fprint(2, "adding arena %s at [%lld,%lld)\n", aname, addr, limit);
arena = newarena(part, vers, aname, addr, limit - addr, blocksize);
if(!arena)
fprint(2, "can't make new arena %s: %r", aname);
freearena(arena);
ap->map[i].start = addr;
ap->map[i].stop = limit;
namecp(ap->map[i].name, aname);
addr = limit;
}
if(wbarenapart(ap) < 0)
fprint(2, "can't write back arena partition header for %s: %r\n", file);
flushdcache();
threadexitsall(0);
}

View File

@ -0,0 +1,115 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
Bloom b;
void
usage(void)
{
fprint(2, "usage: fmtbloom [-s size] [-n nblocks | -N nhash] file\n");
threadexitsall(0);
}
void
threadmain(int argc, char *argv[])
{
Part *part;
char *file;
vlong bits, size, size2;
int nhash;
vlong nblocks;
ventifmtinstall();
statsinit();
size = 0;
nhash = nblocks = 0;
ARGBEGIN{
case 'n':
if(nhash || nblocks)
usage();
nblocks = unittoull(EARGF(usage()));
break;
case 'N':
if(nhash || nblocks)
usage();
nhash = unittoull(EARGF(usage()));
if(nhash > BloomMaxHash){
fprint(2, "maximum possible is -N %d", BloomMaxHash);
usage();
}
break;
case 's':
size = unittoull(ARGF());
if(size == ~0)
usage();
break;
default:
usage();
break;
}ARGEND
if(argc != 1)
usage();
file = argv[0];
part = initpart(file, ORDWR|ODIRECT);
if(part == nil)
sysfatal("can't open partition %s: %r", file);
if(size == 0)
size = part->size;
if(size < 1024*1024)
sysfatal("bloom filter too small");
if(size > MaxBloomSize){
fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n",
size, MaxBloomSize);
size = MaxBloomSize;
}
if(size&(size-1)){
for(size2=1; size2<size; size2*=2)
;
size = size2/2;
fprint(2, "warning: size not a power of 2; only using %lldMB\n", size/1024/1024);
}
if(nblocks){
/*
* no use for more than 32 bits per block
* shoot for less than 64 bits per block
*/
size2 = size;
while(size2*8 >= nblocks*64)
size2 >>= 1;
if(size2 != size){
size = size2;
fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n",
size/1024/1024);
}
/*
* optimal is to use ln 2 times as many hash functions as we have bits per blocks.
*/
bits = (8*size)/nblocks;
nhash = bits*7/10;
if(nhash > BloomMaxHash)
nhash = BloomMaxHash;
}
if(!nhash)
nhash = BloomMaxHash;
if(bloominit(&b, size, nil) < 0)
sysfatal("bloominit: %r");
b.nhash = nhash;
bits = nhash*10/7;
nblocks = (8*size)/bits;
fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size, nhash, nblocks);
b.data = vtmallocz(size);
b.part = part;
if(writebloom(&b) < 0)
sysfatal("writing %s: %r", file);
threadexitsall(0);
}

View File

@ -0,0 +1,120 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
void
usage(void)
{
fprint(2, "usage: fmtindex [-a] config\n");
threadexitsall(0);
}
void
threadmain(int argc, char *argv[])
{
Config conf;
Index *ix;
ArenaPart *ap;
Arena **arenas;
AMap *amap;
u64int addr;
char *file;
u32int i, j, n, narenas;
int add;
ventifmtinstall();
statsinit();
add = 0;
ARGBEGIN{
case 'a':
add = 1;
break;
default:
usage();
break;
}ARGEND
if(argc != 1)
usage();
file = argv[0];
if(runconfig(file, &conf) < 0)
sysfatal("can't initialize config %s: %r", file);
if(conf.index == nil)
sysfatal("no index specified in %s", file);
if(nameok(conf.index) < 0)
sysfatal("illegal index name %s", conf.index);
narenas = 0;
for(i = 0; i < conf.naparts; i++){
ap = conf.aparts[i];
narenas += ap->narenas;
}
if(add){
ix = initindex(conf.index, conf.sects, conf.nsects);
if(ix == nil)
sysfatal("can't initialize index %s: %r", conf.index);
}else{
ix = newindex(conf.index, conf.sects, conf.nsects);
if(ix == nil)
sysfatal("can't create new index %s: %r", conf.index);
n = 0;
for(i = 0; i < ix->nsects; i++)
n += ix->sects[i]->blocks;
if(0) fprint(2, "using %ud buckets of %ud; div=%d\n", ix->buckets, n, ix->div);
}
amap = MKNZ(AMap, narenas);
arenas = MKNZ(Arena*, narenas);
addr = IndexBase;
n = 0;
for(i = 0; i < conf.naparts; i++){
ap = conf.aparts[i];
for(j = 0; j < ap->narenas; j++){
if(n >= narenas)
sysfatal("too few slots in index's arena set");
arenas[n] = ap->arenas[j];
if(n < ix->narenas){
if(arenas[n] != ix->arenas[n])
sysfatal("mismatched arenas %s and %s at slot %d\n",
arenas[n]->name, ix->arenas[n]->name, n);
amap[n] = ix->amap[n];
if(amap[n].start != addr)
sysfatal("mis-located arena %s in index %s\n", arenas[n]->name, ix->name);
addr = amap[n].stop;
}else{
amap[n].start = addr;
addr += ap->arenas[j]->size;
amap[n].stop = addr;
namecp(amap[n].name, ap->arenas[j]->name);
if(0) fprint(2, "add arena %s at [%lld,%lld)\n",
amap[n].name, amap[n].start, amap[n].stop);
}
n++;
}
}
if(0){
fprint(2, "configured index=%s with arenas=%d and storage=%lld\n",
ix->name, n, addr - IndexBase);
fprint(2, "\tbitblocks=%d maxdepth=%d buckets=%d\n",
ix->bitblocks, ix->maxdepth, ix->buckets);
}
fprint(2, "fmtindex: %,d arenas, %,d index buckets, %,lld bytes storage\n",
n, ix->buckets, addr-IndexBase);
ix->amap = amap;
ix->arenas = arenas;
ix->narenas = narenas;
if(wbindex(ix) < 0)
fprint(2, "can't write back arena partition header for %s: %r\n", file);
threadexitsall(0);
}

View File

@ -0,0 +1,83 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
void
usage(void)
{
fprint(2, "usage: fmtisect [-Z] [-b blocksize] name file\n");
threadexitsall(0);
}
void
threadmain(int argc, char *argv[])
{
int vers;
ISect *is;
Part *part;
char *file, *name;
int blocksize, setsize, zero;
ventifmtinstall();
statsinit();
blocksize = 8 * 1024;
setsize = 512 * 1024;
zero = -1;
vers = ISectVersion2;
ARGBEGIN{
case 'b':
blocksize = unittoull(ARGF());
if(blocksize == ~0)
usage();
if(blocksize > MaxDiskBlock){
fprint(2, "block size too large, max %d\n", MaxDiskBlock);
threadexitsall("usage");
}
break;
case '1':
vers = ISectVersion1;
break;
case 'Z':
zero = 0;
break;
default:
usage();
break;
}ARGEND
if(zero == -1){
if(vers == ISectVersion1)
zero = 1;
else
zero = 0;
}
if(argc != 2)
usage();
name = argv[0];
file = argv[1];
if(nameok(name) < 0)
sysfatal("illegal name %s", name);
part = initpart(file, ORDWR|ODIRECT);
if(part == nil)
sysfatal("can't open partition %s: %r", file);
if(zero)
zeropart(part, blocksize);
is = newisect(part, vers, name, blocksize, setsize);
if(is == nil)
sysfatal("can't initialize new index: %r");
fprint(2, "fmtisect %s: %,d buckets of %,d entries, %,d bytes for index map\n",
file, is->blocks, is->buckmax, setsize);
if(wbisect(is) < 0)
fprint(2, "can't write back index section header for %s: %r\n", file);
threadexitsall(0);
}

206
src/cmd/venti/srv/fns.h Normal file
View File

@ -0,0 +1,206 @@
/*
* sorted by 4,/^$/|sort -bd +1
*/
int addarena(Arena *name);
void addstat(int, int);
void addstat2(int, int, int, int);
ZBlock *alloczblock(u32int size, int zeroed, uint alignment);
Arena *amapitoa(Index *index, u64int a, u64int *aa);
u64int arenadirsize(Arena *arena, u32int clumps);
void arenaupdate(Arena *arena, u32int size, u8int *score);
void backsumarena(Arena *arena);
void binstats(long (*fn)(Stats *s0, Stats *s1, void*), void *arg, long t0, long t1, Statbin *bin, int nbin);
int bloominit(Bloom*, vlong, uchar*);
int bucklook(u8int*, int, u8int*, int);
u32int buildbucket(Index *ix, IEStream *ies, IBucket *ib, uint);
void checkdcache(void);
void checklumpcache(void);
int clumpinfoeq(ClumpInfo *c, ClumpInfo *d);
int clumpinfoeq(ClumpInfo *c, ClumpInfo *d);
u32int clumpmagic(Arena *arena, u64int aa);
uint countbits(uint n);
int delarena(Arena *arena);
void delaykickicache(void);
void delaykickround(Round*);
void delaykickroundproc(void*);
void dirtydblock(DBlock*, int);
AState diskstate(void);
void *emalloc(ulong);
void *erealloc(void *, ulong);
char *estrdup(char*);
void *ezmalloc(ulong);
Arena *findarena(char *name);
int flushciblocks(Arena *arena);
void flushdcache(void);
void flushicache(void);
void flushqueue(void);
void fmtzbinit(Fmt *f, ZBlock *b);
void freearena(Arena *arena);
void freearenapart(ArenaPart *ap, int freearenas);
void freeiestream(IEStream *ies);
void freeifile(IFile *f);
void freeisect(ISect *is);
void freeindex(Index *index);
void freepart(Part *part);
void freezblock(ZBlock *b);
DBlock *_getdblock(Part *part, u64int addr, int mode, int load);
DBlock *getdblock(Part *part, u64int addr, int mode);
u32int hashbits(u8int *score, int nbits);
int httpdinit(char *address, char *webroot);
int iaddrcmp(IAddr *ia1, IAddr *ia2);
IEntry* icachedirty(u32int, u32int, u64int);
void icacheclean(IEntry*);
int ientrycmp(const void *vie1, const void *vie2);
char *ifileline(IFile *f);
int ifilename(IFile *f, char *dst);
int ifileu32int(IFile *f, u32int *r);
int inbloomfilter(Bloom*, u8int*);
int indexsect(Index *ix, u8int *score);
int indexsect0(Index *ix, u32int buck);
Arena *initarena(Part *part, u64int base, u64int size, u32int blocksize);
ArenaPart *initarenapart(Part *part);
int initarenasum(void);
void initbloomfilter(Index*);
void initdcache(u32int mem);
void initicache(int bits, int depth);
void initicachewrite(void);
IEStream *initiestream(Part *part, u64int off, u64int clumps, u32int size);
ISect *initisect(Part *part);
Index *initindex(char *name, ISect **sects, int n);
void initlumpcache(u32int size, u32int nblocks);
int initlumpqueues(int nq);
Part* initpart(char *name, int mode);
void initround(Round*, char*, int);
int initventi(char *config, Config *conf);
void insertlump(Lump *lump, Packet *p);
int insertscore(u8int *score, IAddr *ia, int write);
void kickdcache(void);
void kickicache(void);
void kickround(Round*, int wait);
ZBlock *loadclump(Arena *arena, u64int aa, int blocks, Clump *cl, u8int *score, int verify);
DBlock *loadibucket(Index *index, u8int *score, ISect **is, u32int *buck, IBucket *ib);
int loadientry(Index *index, u8int *score, int type, IEntry *ie);
void logerr(int severity, char *fmt, ...);
Lump *lookuplump(u8int *score, int type);
int lookupscore(u8int *score, int type, IAddr *ia, int *rac);
int maparenas(AMap *am, Arena **arenas, int n, char *what);
void markbloomfilter(Bloom*, u8int*);
uint msec(void);
int namecmp(char *s, char *t);
void namecp(char *dst, char *src);
int nameok(char *name);
Arena *newarena(Part *part, u32int, char *name, u64int base, u64int size, u32int blocksize);
ArenaPart *newarenapart(Part *part, u32int blocksize, u32int tabsize);
ISect *newisect(Part *part, u32int vers, char *name, u32int blocksize, u32int tabsize);
Index *newindex(char *name, ISect **sects, int n);
u32int now(void);
int okamap(AMap *am, int n, u64int start, u64int stop, char *what);
int okibucket(IBucket*, ISect*);
int outputamap(Fmt *f, AMap *am, int n);
int outputindex(Fmt *f, Index *ix);
int packarena(Arena *arena, u8int *buf);
int packarenahead(ArenaHead *head, u8int *buf);
int packarenapart(ArenaPart *as, u8int *buf);
void packbloomhead(Bloom*, u8int*);
int packclump(Clump *c, u8int *buf, u32int);
void packclumpinfo(ClumpInfo *ci, u8int *buf);
void packibucket(IBucket *b, u8int *buf, u32int magic);
void packientry(IEntry *i, u8int *buf);
int packisect(ISect *is, u8int *buf);
void packmagic(u32int magic, u8int *buf);
ZBlock *packet2zblock(Packet *p, u32int size);
int parseamap(IFile *f, AMapN *amn);
int parseindex(IFile *f, Index *ix);
void partblocksize(Part *part, u32int blocksize);
int partifile(IFile *f, Part *part, u64int start, u32int size);
void printarenapart(int fd, ArenaPart *ap);
void printarena(int fd, Arena *arena);
void printindex(int fd, Index *ix);
void printstats(void);
void putdblock(DBlock *b);
void putlump(Lump *b);
int queuewrite(Lump *b, Packet *p, int creator, uint ms);
u32int readarena(Arena *arena, u64int aa, u8int *buf, long n);
int readarenamap(AMapN *amn, Part *part, u64int base, u32int size);
Bloom *readbloom(Part*);
int readclumpinfo(Arena *arena, int clump, ClumpInfo *ci);
int readclumpinfos(Arena *arena, int clump, ClumpInfo *cis, int n);
ZBlock *readfile(char *name);
int readifile(IFile *f, char *name);
Packet *readlump(u8int *score, int type, u32int size, int *cached);
int readpart(Part *part, u64int addr, u8int *buf, u32int n);
int runconfig(char *config, Config*);
int scorecmp(u8int *, u8int *);
void scoremem(u8int *score, u8int *buf, int size);
void setatailstate(AState*);
void setdcachestate(AState*);
void seterr(int severity, char *fmt, ...);
void setstat(int, long);
void settrace(char *type);
u64int sortrawientries(Index *ix, Part *tmp, u64int *tmpoff, Bloom *bloom);
void startbloomproc(Bloom*);
Memimage* statgraph(Graph *g);
void statsinit(void);
int storeclump(Index *index, ZBlock *b, u8int *score, int type, u32int creator, IAddr *ia);
int storeientry(Index *index, IEntry *m);
int strscore(char *s, u8int *score);
int stru32int(char *s, u32int *r);
int stru64int(char *s, u64int *r);
void sumarena(Arena *arena);
int syncarena(Arena *arena, u64int start, u32int n, int zok, int fix);
int syncarenaindex(Index *ix, Arena *arena, u32int clump, u64int a, int fix, int *pflush, int check);
int syncindex(Index *ix, int fix, int mustflushicache, int check);
void trace(char *type, char*, ...);
void traceinit(void);
int u64log2(u64int v);
u64int unittoull(char *s);
int unpackarena(Arena *arena, u8int *buf);
int unpackarenahead(ArenaHead *head, u8int *buf);
int unpackarenapart(ArenaPart *as, u8int *buf);
int unpackbloomhead(Bloom*, u8int*);
int unpackclump(Clump *c, u8int *buf, u32int);
void unpackclumpinfo(ClumpInfo *ci, u8int *buf);
void unpackibucket(IBucket *b, u8int *buf, u32int magic);
void unpackientry(IEntry *i, u8int *buf);
int unpackisect(ISect *is, u8int *buf);
u32int unpackmagic(u8int *buf);
void ventifmtinstall(void);
void vtloghdump(Hio*, VtLog*);
void vtloghlist(Hio*);
int vtproc(void(*)(void*), void*);
int vttypevalid(int type);
void waitforkick(Round*);
int wbarena(Arena *arena);
int wbarenahead(Arena *arena);
int wbarenamap(AMap *am, int n, Part *part, u64int base, u64int size);
int wbarenapart(ArenaPart *ap);
void wbbloomhead(Bloom*);
int wbisect(ISect *is);
int wbindex(Index *ix);
int whackblock(u8int *dst, u8int *src, int ssize);
u64int writeaclump(Arena *a, Clump *c, u8int *clbuf, u64int, u64int*);
u32int writearena(Arena *arena, u64int aa, u8int *clbuf, u32int n);
int writebloom(Bloom*);
int writeclumpinfo(Arena *arean, int clump, ClumpInfo *ci);
int writepng(Hio*, Memimage*);
u64int writeiclump(Index *ix, Clump *c, u8int *clbuf, u64int*);
int writelump(Packet *p, u8int *score, int type, u32int creator, uint ms);
int writepart(Part *part, u64int addr, u8int *buf, u32int n);
int writeqlump(Lump *u, Packet *p, int creator, uint ms);
Packet *zblock2packet(ZBlock *zb, u32int size);
void zeropart(Part *part, int blocksize);
/*
#pragma varargck argpos sysfatal 1
#pragma varargck argpos logerr 2
#pragma varargck argpos SetErr 2
*/
#define scorecmp(h1,h2) memcmp((h1),(h2),VtScoreSize)
#define scorecp(h1,h2) memmove((h1),(h2),VtScoreSize)
#define MK(t) ((t*)emalloc(sizeof(t)))
#define MKZ(t) ((t*)ezmalloc(sizeof(t)))
#define MKN(t,n) ((t*)emalloc((n)*sizeof(t)))
#define MKNZ(t,n) ((t*)ezmalloc((n)*sizeof(t)))
#define MKNA(t,at,n) ((t*)emalloc(sizeof(t) + (n)*sizeof(at)))

202
src/cmd/venti/srv/graph.c Normal file
View File

@ -0,0 +1,202 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
enum
{
Top = 1,
Bottom = 1,
Left = 40,
Right = 0,
MinWidth = Left+Right+2,
MinHeight = Top+Bottom+2,
DefaultWidth = Left+Right+500,
DefaultHeight = Top+Bottom+40
};
QLock memdrawlock;
static Memsubfont *smallfont;
static Memimage *black;
static Memimage *blue;
static Memimage *red;
static Memimage *lofill[6];
static Memimage *hifill[6];
static Memimage *grid;
static ulong fill[] = {
0xFFAAAAFF, 0xBB5D5DFF, /* peach */
DPalegreygreen, DPurpleblue, /* aqua */
DDarkyellow, DYellowgreen, /* yellow */
DMedgreen, DDarkgreen, /* green */
0x00AAFFFF, 0x0088CCFF, /* blue */
0xCCCCCCFF, 0x888888FF, /* grey */
};
Memimage*
allocrepl(ulong color)
{
Memimage *m;
m = allocmemimage(Rect(0,0,1,1), RGB24);
memfillcolor(m, color);
m->flags |= Frepl;
m->clipr = Rect(-1000000, -1000000, 1000000, 1000000);
return m;
}
static void
ginit(void)
{
static int first = 1;
int i;
if(!first)
return;
first = 0;
memimageinit();
smallfont = openmemsubfont(unsharp("#9/font/lucidasans/lstr.10"));
black = memblack;
blue = allocrepl(DBlue);
red = allocrepl(DRed);
grid = allocrepl(0x77777777);
for(i=0; i<nelem(fill)/2 && i<nelem(lofill) && i<nelem(hifill); i++){
lofill[i] = allocrepl(fill[2*i]);
hifill[i] = allocrepl(fill[2*i+1]);
}
}
static void
mklabel(char *str, int v)
{
if(v < 0){
v = -v;
*str++ = '-';
}
if(v < 10000)
sprint(str, "%d", v);
else if(v < 10000000)
sprint(str, "%dk", v/1000);
else
sprint(str, "%dM", v/1000000);
}
static void
drawlabel(Memimage *m, Point p, int n)
{
char buf[30];
Point w;
mklabel(buf, n);
w = memsubfontwidth(smallfont, buf);
memimagestring(m, Pt(p.x-5-w.x, p.y), memblack, ZP, smallfont, buf);
}
static int
scalept(int val, int valmin, int valmax, int ptmin, int ptmax)
{
if(val <= valmin)
val = valmin;
if(val >= valmax)
val = valmax;
if(valmax == valmin)
valmax++;
return ptmin + (vlong)(val-valmin)*(ptmax-ptmin)/(valmax-valmin);
}
Memimage*
statgraph(Graph *g)
{
int i, lastlo, nbin, x, lo, hi, min, max, first;
Memimage *m;
Rectangle r;
Statbin *b, bin[2000]; /* 32 kB, but whack is worse */
needstack(8192); /* double check that bin didn't kill us */
if(g->wid <= MinWidth)
g->wid = DefaultWidth;
if(g->ht <= MinHeight)
g->ht = DefaultHeight;
if(g->wid > nelem(bin))
g->wid = nelem(bin);
if(g->fill < 0)
g->fill = ((uint)g->arg>>8)%nelem(lofill);
if(g->fill > nelem(lofill))
g->fill %= nelem(lofill);
nbin = g->wid - (Left+Right);
binstats(g->fn, g->arg, g->t0, g->t1, bin, nbin);
/*
* compute bounds
*/
min = g->min;
max = g->max;
if(min < 0 || max <= min){
min = max = 0;
first = 1;
for(i=0; i<nbin; i++){
b = &bin[i];
if(b->nsamp == 0)
continue;
if(first || b->min < min)
min = b->min;
if(first || b->max > max)
max = b->max;
first = 0;
}
}
qlock(&memdrawlock);
ginit();
if(smallfont==nil || black==nil || blue==nil || red==nil || hifill==nil || lofill==nil){
werrstr("graphics initialization failed");
qunlock(&memdrawlock);
return nil;
}
/* fresh image */
m = allocmemimage(Rect(0,0,g->wid,g->ht), ABGR32);
if(m == nil){
qunlock(&memdrawlock);
return nil;
}
r = Rect(Left, Top, g->wid-Right, g->ht-Bottom);
memfillcolor(m, DTransparent);
/* x axis */
memimagedraw(m, Rect(r.min.x, r.max.y, r.max.x, r.max.y+1), black, ZP, memopaque, ZP, S);
/* y labels */
drawlabel(m, r.min, max);
if(min != 0)
drawlabel(m, Pt(r.min.x, r.max.y-smallfont->height), min);
/* actual data */
lastlo = -1;
for(i=0; i<nbin; i++){
b = &bin[i];
if(b->nsamp == 0)
continue;
lo = scalept(b->min, min, max, r.max.y, r.min.y);
hi = scalept(b->max, min, max, r.max.y, r.min.y);
x = r.min.x+i;
hi-=2;
if(0)
if(lastlo != -1){
if(lastlo < lo)
memimagedraw(m, Rect(x-1, lastlo, x, lo), hifill[g->fill], ZP, memopaque, ZP, S);
else if(lastlo > lo)
memimagedraw(m, Rect(x-1, lo, x, lastlo), hifill[g->fill], ZP, memopaque, ZP, S);
}
memimagedraw(m, Rect(x, hi, x+1,lo), hifill[g->fill], ZP, memopaque, ZP, S);
memimagedraw(m, Rect(x, lo, x+1, r.max.y), lofill[g->fill], ZP, memopaque, ZP, S);
lastlo = lo;
}
if(bin[nbin-1].nsamp)
drawlabel(m, Pt(r.max.x, r.min.y+(Dy(r)-smallfont->height)/2), bin[nbin-1].avg);
qunlock(&memdrawlock);
return m;
}

988
src/cmd/venti/srv/httpd.c Normal file
View File

@ -0,0 +1,988 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
#include "xml.h"
typedef struct HttpObj HttpObj;
extern QLock memdrawlock;
enum
{
ObjNameSize = 64,
MaxObjs = 16
};
struct HttpObj
{
char name[ObjNameSize];
int (*f)(HConnect*);
};
static HttpObj objs[MaxObjs];
static char *webroot;
static void listenproc(void*);
static int estats(HConnect *c);
static int dindex(HConnect *c);
static int xindex(HConnect *c);
static int xlog(HConnect *c);
static int sindex(HConnect *c);
static int hicacheflush(HConnect *c);
static int hdcacheflush(HConnect *c);
static int notfound(HConnect *c);
static int httpdobj(char *name, int (*f)(HConnect*));
static int xgraph(HConnect *c);
static int xset(HConnect *c);
static int fromwebdir(HConnect *c);
int
httpdinit(char *address, char *dir)
{
fmtinstall('D', hdatefmt);
/* fmtinstall('H', httpfmt); */
fmtinstall('U', hurlfmt);
if(address == nil)
address = "tcp!*!http";
webroot = dir;
httpdobj("/stats", estats);
httpdobj("/index", dindex);
httpdobj("/storage", sindex);
httpdobj("/xindex", xindex);
httpdobj("/flushicache", hicacheflush);
httpdobj("/flushdcache", hdcacheflush);
httpdobj("/graph/", xgraph);
httpdobj("/set/", xset);
httpdobj("/log", xlog);
httpdobj("/log/", xlog);
if(vtproc(listenproc, address) < 0)
return -1;
return 0;
}
static int
httpdobj(char *name, int (*f)(HConnect*))
{
int i;
if(name == nil || strlen(name) >= ObjNameSize)
return -1;
for(i = 0; i < MaxObjs; i++){
if(objs[i].name[0] == '\0'){
strcpy(objs[i].name, name);
objs[i].f = f;
return 0;
}
if(strcmp(objs[i].name, name) == 0)
return -1;
}
return -1;
}
static HConnect*
mkconnect(void)
{
HConnect *c;
c = mallocz(sizeof(HConnect), 1);
if(c == nil)
sysfatal("out of memory");
c->replog = nil;
c->hpos = c->header;
c->hstop = c->header;
return c;
}
void httpproc(void*);
static void
listenproc(void *vaddress)
{
HConnect *c;
char *address, ndir[NETPATHLEN], dir[NETPATHLEN];
int ctl, nctl, data;
//sleep(1000); /* let strace find us */
address = vaddress;
ctl = announce(address, dir);
if(ctl < 0){
fprint(2, "venti: httpd can't announce on %s: %r\n", address);
return;
}
if(0) print("announce ctl %d dir %s\n", ctl, dir);
for(;;){
/*
* wait for a call (or an error)
*/
nctl = listen(dir, ndir);
if(0) print("httpd listen %d %s...\n", nctl, ndir);
if(nctl < 0){
fprint(2, "venti: httpd can't listen on %s: %r\n", address);
return;
}
data = accept(ctl, ndir);
if(0) print("httpd accept %d...\n", data);
if(data < 0){
fprint(2, "venti: httpd accept: %r\n");
close(nctl);
continue;
}
if(0) print("httpd close nctl %d\n", nctl);
close(nctl);
c = mkconnect();
hinit(&c->hin, data, Hread);
hinit(&c->hout, data, Hwrite);
vtproc(httpproc, c);
}
}
void
httpproc(void *v)
{
HConnect *c;
int ok, i, n;
//sleep(1000); /* let strace find us */
c = v;
for(;;){
/*
* No timeout because the signal appears to hit every
* proc, not just us.
*/
if(hparsereq(c, 0) < 0)
break;
ok = -1;
for(i = 0; i < MaxObjs && objs[i].name[0]; i++){
n = strlen(objs[i].name);
if((objs[i].name[n-1] == '/' && strncmp(c->req.uri, objs[i].name, n) == 0)
|| (objs[i].name[n-1] != '/' && strcmp(c->req.uri, objs[i].name) == 0)){
ok = (*objs[i].f)(c);
goto found;
}
}
ok = fromwebdir(c);
found:
if(c->head.closeit)
ok = -1;
hreqcleanup(c);
if(ok < 0)
break;
}
hreqcleanup(c);
close(c->hin.fd);
free(c);
}
static int
percent(long v, long total)
{
if(total == 0)
total = 1;
if(v < 1000*1000)
return (v * 100) / total;
total /= 100;
if(total == 0)
total = 1;
return v / total;
}
static int
preq(HConnect *c)
{
if(hparseheaders(c, 0) < 0)
return -1;
if(strcmp(c->req.meth, "GET") != 0
&& strcmp(c->req.meth, "HEAD") != 0)
return hunallowed(c, "GET, HEAD");
if(c->head.expectother || c->head.expectcont)
return hfail(c, HExpectFail, nil);
return 0;
}
static int
preqtype(HConnect *c, char *type)
{
Hio *hout;
int r;
r = preq(c);
if(r < 0)
return r;
hout = &c->hout;
if(c->req.vermaj){
hokheaders(c);
hprint(hout, "Content-type: %s\r\n", type);
if(http11(c))
hprint(hout, "Transfer-Encoding: chunked\r\n");
hprint(hout, "\r\n");
}
if(http11(c))
hxferenc(hout, 1);
else
c->head.closeit = 1;
return 0;
}
static int
preqtext(HConnect *c)
{
return preqtype(c, "text/plain");
}
static int
notfound(HConnect *c)
{
int r;
r = preq(c);
if(r < 0)
return r;
return hfail(c, HNotFound, c->req.uri);
}
struct {
char *ext;
char *type;
} exttab[] = {
".html", "text/html",
".txt", "text/plain",
".xml", "text/xml",
".png", "image/png",
".gif", "image/gif",
0
};
static int
fromwebdir(HConnect *c)
{
char buf[4096], *p, *ext, *type;
int i, fd, n, defaulted;
Dir *d;
if(webroot == nil || strstr(c->req.uri, ".."))
return notfound(c);
snprint(buf, sizeof buf-20, "%s/%s", webroot, c->req.uri+1);
defaulted = 0;
reopen:
if((fd = open(buf, OREAD)) < 0)
return notfound(c);
d = dirfstat(fd);
if(d == nil){
close(fd);
return notfound(c);
}
if(d->mode&DMDIR){
if(!defaulted){
defaulted = 1;
strcat(buf, "/index.html");
free(d);
close(fd);
goto reopen;
}
free(d);
return notfound(c);
}
free(d);
p = buf+strlen(buf);
type = "application/octet-stream";
for(i=0; exttab[i].ext; i++){
ext = exttab[i].ext;
if(p-strlen(ext) >= buf && strcmp(p-strlen(ext), ext) == 0){
type = exttab[i].type;
break;
}
}
if(preqtype(c, type) < 0){
close(fd);
return 0;
}
while((n = read(fd, buf, sizeof buf)) > 0)
if(hwrite(&c->hout, buf, n) < 0)
break;
close(fd);
hflush(&c->hout);
return 0;
}
static struct
{
char *name;
int *p;
} namedints[] =
{
"compress", &compressblocks,
"devnull", &writestodevnull,
"logging", &ventilogging,
"stats", &collectstats,
"icachesleeptime", &icachesleeptime,
"arenasumsleeptime", &arenasumsleeptime,
0
};
static int
xset(HConnect *c)
{
int i, nf, r;
char *f[10], *s;
s = estrdup(c->req.uri);
nf = getfields(s+strlen("/set/"), f, nelem(f), 1, "/");
if(nf < 1)
return notfound(c);
for(i=0; namedints[i].name; i++){
if(strcmp(f[0], namedints[i].name) == 0){
if(nf >= 2)
*namedints[i].p = atoi(f[1]);
r = preqtext(c);
if(r < 0)
return r;
hprint(&c->hout, "%s = %d\n", f[0], *namedints[i].p);
hflush(&c->hout);
return 0;
}
}
return notfound(c);
}
static int
estats(HConnect *c)
{
Hio *hout;
int r;
r = preqtext(c);
if(r < 0)
return r;
hout = &c->hout;
/*
hprint(hout, "lump writes=%,ld\n", stats.lumpwrites);
hprint(hout, "lump reads=%,ld\n", stats.lumpreads);
hprint(hout, "lump cache read hits=%,ld\n", stats.lumphit);
hprint(hout, "lump cache read misses=%,ld\n", stats.lumpmiss);
hprint(hout, "clump disk writes=%,ld\n", stats.clumpwrites);
hprint(hout, "clump disk bytes written=%,lld\n", stats.clumpbwrites);
hprint(hout, "clump disk bytes compressed=%,lld\n", stats.clumpbcomp);
hprint(hout, "clump disk reads=%,ld\n", stats.clumpreads);
hprint(hout, "clump disk bytes read=%,lld\n", stats.clumpbreads);
hprint(hout, "clump disk bytes uncompressed=%,lld\n", stats.clumpbuncomp);
hprint(hout, "clump directory disk writes=%,ld\n", stats.ciwrites);
hprint(hout, "clump directory disk reads=%,ld\n", stats.cireads);
hprint(hout, "index disk writes=%,ld\n", stats.indexwrites);
hprint(hout, "index disk reads=%,ld\n", stats.indexreads);
hprint(hout, "index disk bloom filter hits=%,ld %d%% falsemisses=%,ld %d%%\n",
stats.indexbloomhits,
percent(stats.indexbloomhits, stats.indexreads),
stats.indexbloomfalsemisses,
percent(stats.indexbloomfalsemisses, stats.indexreads));
hprint(hout, "bloom filter bits=%,ld of %,ld %d%%\n",
stats.bloomones, stats.bloombits, percent(stats.bloomones, stats.bloombits));
hprint(hout, "index disk reads for modify=%,ld\n", stats.indexwreads);
hprint(hout, "index disk reads for allocation=%,ld\n", stats.indexareads);
hprint(hout, "index block splits=%,ld\n", stats.indexsplits);
hprint(hout, "index cache lookups=%,ld\n", stats.iclookups);
hprint(hout, "index cache hits=%,ld %d%%\n", stats.ichits,
percent(stats.ichits, stats.iclookups));
hprint(hout, "index cache fills=%,ld %d%%\n", stats.icfills,
percent(stats.icfills, stats.iclookups));
hprint(hout, "index cache inserts=%,ld\n", stats.icinserts);
hprint(hout, "disk cache hits=%,ld\n", stats.pchit);
hprint(hout, "disk cache misses=%,ld\n", stats.pcmiss);
hprint(hout, "disk cache reads=%,ld\n", stats.pcreads);
hprint(hout, "disk cache bytes read=%,lld\n", stats.pcbreads);
hprint(hout, "disk cache writes=%,ld\n", stats.dirtydblocks);
hprint(hout, "disk cache writes absorbed=%,ld %d%%\n", stats.absorbedwrites,
percent(stats.absorbedwrites, stats.dirtydblocks));
hprint(hout, "disk cache flushes=%,ld\n", stats.dcacheflushes);
hprint(hout, "disk cache flush writes=%,ld (%,ld per flush)\n",
stats.dcacheflushwrites,
stats.dcacheflushwrites/(stats.dcacheflushes ? stats.dcacheflushes : 1));
hprint(hout, "disk writes=%,ld\n", stats.diskwrites);
hprint(hout, "disk bytes written=%,lld\n", stats.diskbwrites);
hprint(hout, "disk reads=%,ld\n", stats.diskreads);
hprint(hout, "disk bytes read=%,lld\n", stats.diskbreads);
*/
hflush(hout);
return 0;
}
static int
sindex(HConnect *c)
{
Hio *hout;
Index *ix;
Arena *arena;
vlong clumps, cclumps, uncsize, used, size;
int i, r, active;
r = preqtext(c);
if(r < 0)
return r;
hout = &c->hout;
ix = mainindex;
hprint(hout, "index=%s\n", ix->name);
active = 0;
clumps = 0;
cclumps = 0;
uncsize = 0;
used = 0;
size = 0;
for(i = 0; i < ix->narenas; i++){
arena = ix->arenas[i];
if(arena != nil && arena->memstats.clumps != 0){
active++;
clumps += arena->memstats.clumps;
cclumps += arena->memstats.cclumps;
uncsize += arena->memstats.uncsize;
used += arena->memstats.used;
}
size += arena->size;
}
hprint(hout, "total arenas=%,d active=%,d\n", ix->narenas, active);
hprint(hout, "total space=%,lld used=%,lld\n", size, used + clumps * ClumpInfoSize);
hprint(hout, "clumps=%,lld compressed clumps=%,lld data=%,lld compressed data=%,lld\n",
clumps, cclumps, uncsize, used - clumps * ClumpSize);
hflush(hout);
return 0;
}
static void
darena(Hio *hout, Arena *arena)
{
hprint(hout, "arena='%s' on %s at [%lld,%lld)\n\tversion=%d created=%d modified=%d",
arena->name, arena->part->name, arena->base, arena->base + arena->size + 2 * arena->blocksize,
arena->version, arena->ctime, arena->wtime);
if(arena->memstats.sealed)
hprint(hout, " mem=sealed");
if(arena->diskstats.sealed)
hprint(hout, " disk=sealed");
hprint(hout, "\n");
if(scorecmp(zeroscore, arena->score) != 0)
hprint(hout, "\tscore=%V\n", arena->score);
hprint(hout, "\tmem: clumps=%d compressed clumps=%d data=%,lld compressed data=%,lld storage=%,lld\n",
arena->memstats.clumps, arena->memstats.cclumps, arena->memstats.uncsize,
arena->memstats.used - arena->memstats.clumps * ClumpSize,
arena->memstats.used + arena->memstats.clumps * ClumpInfoSize);
hprint(hout, "\tdisk: clumps=%d compressed clumps=%d data=%,lld compressed data=%,lld storage=%,lld\n",
arena->diskstats.clumps, arena->diskstats.cclumps, arena->diskstats.uncsize,
arena->diskstats.used - arena->diskstats.clumps * ClumpSize,
arena->diskstats.used + arena->diskstats.clumps * ClumpInfoSize);
}
static int
hicacheflush(HConnect *c)
{
Hio *hout;
int r;
r = preqtext(c);
if(r < 0)
return r;
hout = &c->hout;
flushicache();
hprint(hout, "flushed icache\n");
hflush(hout);
return 0;
}
static int
hdcacheflush(HConnect *c)
{
Hio *hout;
int r;
r = preqtext(c);
if(r < 0)
return r;
hout = &c->hout;
flushdcache();
hprint(hout, "flushed dcache\n");
hflush(hout);
return 0;
}
static int
dindex(HConnect *c)
{
Hio *hout;
Index *ix;
int i, r;
r = preqtext(c);
if(r < 0)
return r;
hout = &c->hout;
ix = mainindex;
hprint(hout, "index=%s version=%d blocksize=%d tabsize=%d\n",
ix->name, ix->version, ix->blocksize, ix->tabsize);
hprint(hout, "\tbuckets=%d div=%d\n", ix->buckets, ix->div);
for(i = 0; i < ix->nsects; i++)
hprint(hout, "\tsect=%s for buckets [%lld,%lld) buckmax=%d\n", ix->smap[i].name, ix->smap[i].start, ix->smap[i].stop, ix->sects[i]->buckmax);
for(i = 0; i < ix->narenas; i++){
if(ix->arenas[i] != nil && ix->arenas[i]->memstats.clumps != 0){
hprint(hout, "arena=%s at index [%lld,%lld)\n\t", ix->amap[i].name, ix->amap[i].start, ix->amap[i].stop);
darena(hout, ix->arenas[i]);
}
}
hflush(hout);
return 0;
}
typedef struct Arg Arg;
struct Arg
{
int index;
int index2;
};
static long
rawgraph(Stats *s, Stats *t, void *va)
{
Arg *a;
a = va;
return t->n[a->index];
}
static long
diffgraph(Stats *s, Stats *t, void *va)
{
Arg *a;
a = va;
return t->n[a->index] - s->n[a->index];
}
static long
pctgraph(Stats *s, Stats *t, void *va)
{
Arg *a;
a = va;
return percent(t->n[a->index], t->n[a->index2]);
}
static long
pctdiffgraph(Stats *s, Stats *t, void *va)
{
Arg *a;
a = va;
return percent(t->n[a->index]-s->n[a->index], t->n[a->index2]-s->n[a->index2]);
}
static long
netbw(Stats *s)
{
ulong *n;
n = s->n;
return n[StatRpcReadBytes]+n[StatRpcWriteBytes]; /* not exactly right */
}
static long
diskbw(Stats *s)
{
ulong *n;
n = s->n;
return n[StatApartReadBytes]+n[StatApartWriteBytes]
+ n[StatIsectReadBytes]+n[StatIsectWriteBytes]
+ n[StatSumReadBytes];
}
static long
iobw(Stats *s)
{
return netbw(s)+diskbw(s);
}
static long
diskgraph(Stats *s, Stats *t, void *va)
{
USED(va);
return diskbw(t)-diskbw(s);
}
static long
netgraph(Stats *s, Stats *t, void *va)
{
USED(va);
return netbw(t)-netbw(s);
}
static long
iograph(Stats *s, Stats *t, void *va)
{
USED(va);
return iobw(t)-iobw(s);
}
static char* graphname[] =
{
"rpctotal",
"rpcread",
"rpcreadok",
"rpcreadfail",
"rpcreadbyte",
"rpcreadtime",
"rpcreadcached",
"rpcreadcachedtime",
"rpcreaduncached",
"rpcreaduncachedtime",
"rpcwrite",
"rpcwritenew",
"rpcwriteold",
"rpcwritefail",
"rpcwritebyte",
"rpcwritetime",
"rpcwritenewtime",
"rpcwriteoldtime",
"lcachehit",
"lcachemiss",
"lcachelookup",
"lcachewrite",
"lcachesize",
"lcachestall",
"lcachelookuptime",
"dcachehit",
"dcachemiss",
"dcachelookup",
"dcacheread",
"dcachewrite",
"dcachedirty",
"dcachesize",
"dcacheflush",
"dcachestall",
"dcachelookuptime",
"dblockstall",
"lumpstall",
"icachehit",
"icachemiss",
"icachelookup",
"icachewrite",
"icachefill",
"icacheprefetch",
"icachedirty",
"icachesize",
"icacheflush",
"icachestall",
"icachelookuptime",
"bloomhit",
"bloommiss",
"bloomfalsemiss",
"bloomlookup",
"bloomones",
"bloombits",
"bloomlookuptime",
"apartread",
"apartreadbyte",
"apartwrite",
"apartwritebyte",
"isectread",
"isectreadbyte",
"isectwrite",
"isectwritebyte",
"sumread",
"sumreadbyte",
};
static int
findname(char *s)
{
int i;
for(i=0; i<nelem(graphname); i++)
if(strcmp(graphname[i], s) == 0)
return i;
fprint(2, "no name '%s'\n", s);
return -1;
}
static void
dotextbin(Hio *io, Graph *g)
{
int i, nbin;
Statbin *b, bin[2000]; /* 32 kB, but whack is worse */
needstack(8192); /* double check that bin didn't kill us */
nbin = 100;
binstats(g->fn, g->arg, g->t0, g->t1, bin, nbin);
hprint(io, "stats\n\n");
for(i=0; i<nbin; i++){
b = &bin[i];
hprint(io, "%d: nsamp=%d min=%d max=%d avg=%d\n",
i, b->nsamp, b->min, b->max, b->avg);
}
}
static int
xgraph(HConnect *c)
{
char *f[20], *s;
Hio *hout;
Memimage *m;
int i, nf, dotext;
Graph g;
Arg arg;
s = estrdup(c->req.uri);
if(0) fprint(2, "graph %s\n" ,s);
memset(&g, 0, sizeof g);
nf = getfields(s+strlen("/graph/"), f, nelem(f), 1, "/");
if(nf < 1)
goto notfound;
if((arg.index = findname(f[0])) == -1 && strcmp(f[0], "*") != 0)
goto notfound;
g.arg = &arg;
g.t0 = -120;
g.t1 = 0;
g.min = -1;
g.max = -1;
g.fn = rawgraph;
g.wid = -1;
g.ht = -1;
dotext = 0;
g.fill = -1;
for(i=1; i<nf; i++){
if(strncmp(f[i], "t0=", 3) == 0)
g.t0 = atoi(f[i]+3);
else if(strncmp(f[i], "t1=", 3) == 0)
g.t1 = atoi(f[i]+3);
else if(strncmp(f[i], "min=", 4) == 0)
g.min = atoi(f[i]+4);
else if(strncmp(f[i], "max=", 4) == 0)
g.max = atoi(f[i]+4);
else if(strncmp(f[i], "pct=", 4) == 0){
if((arg.index2 = findname(f[i]+4)) == -1)
goto notfound;
g.fn = pctgraph;
g.min = 0;
g.max = 100;
}else if(strncmp(f[i], "pctdiff=", 8) == 0){
if((arg.index2 = findname(f[i]+8)) == -1)
goto notfound;
g.fn = pctdiffgraph;
g.min = 0;
g.max = 100;
}else if(strcmp(f[i], "diff") == 0)
g.fn = diffgraph;
else if(strcmp(f[i], "text") == 0)
dotext = 1;
else if(strncmp(f[i], "wid=", 4) == 0)
g.wid = atoi(f[i]+4);
else if(strncmp(f[i], "ht=", 3) == 0)
g.ht = atoi(f[i]+3);
else if(strncmp(f[i], "fill=", 5) == 0)
g.fill = atoi(f[i]+5);
else if(strcmp(f[i], "diskbw") == 0)
g.fn = diskgraph;
else if(strcmp(f[i], "iobw") == 0)
g.fn = iograph;
else if(strcmp(f[i], "netbw") == 0)
g.fn = netgraph;
}
if(dotext){
preqtype(c, "text/plain");
dotextbin(&c->hout, &g);
hflush(&c->hout);
return 0;
}
m = statgraph(&g);
if(m == nil)
goto notfound;
if(preqtype(c, "image/png") < 0)
return -1;
hout = &c->hout;
writepng(hout, m);
qlock(&memdrawlock);
freememimage(m);
qunlock(&memdrawlock);
hflush(hout);
free(s);
return 0;
notfound:
free(s);
return notfound(c);
}
static int
xloglist(HConnect *c)
{
if(preqtype(c, "text/html") < 0)
return -1;
vtloghlist(&c->hout);
hflush(&c->hout);
return 0;
}
static int
xlog(HConnect *c)
{
char *name;
VtLog *l;
if(strcmp(c->req.uri, "/log") == 0 || strcmp(c->req.uri, "/log/") == 0)
return xloglist(c);
if(strncmp(c->req.uri, "/log/", 5) != 0)
return notfound(c);
name = c->req.uri + strlen("/log/");
l = vtlogopen(name, 0);
if(l == nil)
return notfound(c);
if(preqtype(c, "text/html") < 0){
vtlogclose(l);
return -1;
}
vtloghdump(&c->hout, l);
vtlogclose(l);
hflush(&c->hout);
return 0;
}
static int
xindex(HConnect *c)
{
if(preqtype(c, "text/xml") < 0)
return -1;
xmlindex(&c->hout, mainindex, "index", 0);
hflush(&c->hout);
return 0;
}
void
xmlindent(Hio *hout, int indent)
{
int i;
for(i = 0; i < indent; i++)
hputc(hout, '\t');
}
void
xmlaname(Hio *hout, char *v, char *tag)
{
hprint(hout, " %s=\"%s\"", tag, v);
}
void
xmlscore(Hio *hout, u8int *v, char *tag)
{
if(scorecmp(zeroscore, v) == 0)
return;
hprint(hout, " %s=\"%V\"", tag, v);
}
void
xmlsealed(Hio *hout, int v, char *tag)
{
if(!v)
return;
hprint(hout, " %s=\"yes\"", tag);
}
void
xmlu32int(Hio *hout, u32int v, char *tag)
{
hprint(hout, " %s=\"%ud\"", tag, v);
}
void
xmlu64int(Hio *hout, u64int v, char *tag)
{
hprint(hout, " %s=\"%llud\"", tag, v);
}
void
vtloghdump(Hio *h, VtLog *l)
{
int i;
VtLogChunk *c;
char *name;
name = l ? l->name : "&lt;nil&gt;";
fprint(2, "hdump xfer %d\n", h->xferenc);
hprint(h, "<html><head>\n");
hprint(h, "<title>Venti Server Log: %s</title>\n", name);
hprint(h, "</head><body>\n");
hprint(h, "<b>Venti Server Log: %s</b>\n<p>\n", name);
if(l){
c = l->w;
for(i=0; i<l->nchunk; i++){
if(++c == l->chunk+l->nchunk)
c = l->chunk;
hwrite(h, c->p, c->wp-c->p);
}
}
hprint(h, "</body></html>\n");
}
static int
strpcmp(const void *va, const void *vb)
{
return strcmp(*(char**)va, *(char**)vb);
}
void
vtloghlist(Hio *h)
{
char **p;
int i, n;
hprint(h, "<html><head>\n");
hprint(h, "<title>Venti Server Logs</title>\n");
hprint(h, "</head><body>\n");
hprint(h, "<b>Venti Server Logs</b>\n<p>\n");
p = vtlognames(&n);
qsort(p, n, sizeof(p[0]), strpcmp);
for(i=0; i<n; i++)
hprint(h, "<a href=\"/log/%s\">%s</a><br>\n", p[i], p[i]);
vtfree(p);
hprint(h, "</body></html>\n");
}

348
src/cmd/venti/srv/icache.c Normal file
View File

@ -0,0 +1,348 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
typedef struct ICache ICache;
struct ICache
{
QLock lock; /* locks hash table & all associated data */
Rendez full;
IEntry **heads; /* heads of all the hash chains */
int bits; /* bits to use for indexing heads */
u32int size; /* number of heads; == 1 << bits, should be < entries */
IEntry *base; /* all allocated hash table entries */
u32int entries; /* elements in base */
IEntry *dirty; /* chain of dirty elements */
u32int ndirty;
u32int maxdirty;
u32int unused; /* index of first unused element in base */
u32int stolen; /* last head from which an element was stolen */
Arena *last[4];
Arena *lastload;
int nlast;
};
static ICache icache;
static IEntry *icachealloc(IAddr *ia, u8int *score);
/*
* bits is the number of bits in the icache hash table
* depth is the average depth
* memory usage is about (1<<bits) * depth * sizeof(IEntry) + (1<<bits) * sizeof(IEntry*)
*/
void
initicache(int bits, int depth)
{
icache.bits = bits;
icache.size = 1 << bits;
icache.entries = depth * icache.size;
icache.maxdirty = icache.entries/2;
icache.base = MKNZ(IEntry, icache.entries);
icache.heads = MKNZ(IEntry*, icache.size);
icache.full.l = &icache.lock;
setstat(StatIcacheSize, icache.entries);
}
u32int
hashbits(u8int *sc, int bits)
{
u32int v;
v = (sc[0] << 24) | (sc[1] << 16) | (sc[2] << 8) | sc[3];
if(bits < 32)
v >>= (32 - bits);
return v;
}
static void
loadarenaclumps(Arena *arena, u64int aa)
{
ulong i;
ClumpInfo ci;
IAddr ia;
fprint(2, "seed index cache with arena @%llud, (map %llud), %d clumps\n", arena->base, aa, arena->memstats.clumps);
for(i=0; i<arena->memstats.clumps; i++){
if(readclumpinfo(arena, i, &ci) < 0)
break;
ia.type = ci.type;
ia.size = ci.uncsize;
ia.blocks = (ci.size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
ia.addr = aa;
aa += ClumpSize + ci.size;
if(ia.type != VtCorruptType)
insertscore(ci.score, &ia, 0);
}
}
/*
ZZZ need to think about evicting the correct IEntry,
and writing back the wtime.
* look up data score in the index cache
* if this fails, pull it in from the disk index table, if it exists.
*
* must be called with the lump for this score locked
*/
int
lookupscore(u8int *score, int type, IAddr *ia, int *rac)
{
IEntry d, *ie, *last;
u32int h;
u64int aa;
Arena *load;
int i;
uint ms;
load = nil;
aa = 0;
ms = msec();
trace(TraceLump, "lookupscore %V.%d", score, type);
qlock(&icache.lock);
h = hashbits(score, icache.bits);
last = nil;
for(ie = icache.heads[h]; ie != nil; ie = ie->next){
if(ie->ia.type == type && scorecmp(ie->score, score)==0){
if(last != nil)
last->next = ie->next;
else
icache.heads[h] = ie->next;
addstat(StatIcacheHit, 1);
ie->rac = 1;
trace(TraceLump, "lookupscore incache");
goto found;
}
last = ie;
}
addstat(StatIcacheMiss, 1);
qunlock(&icache.lock);
if(loadientry(mainindex, score, type, &d) < 0){
ms = msec() - ms;
addstat2(StatIcacheRead, 1, StatIcacheReadTime, ms);
return -1;
}
addstat(StatIcacheFill, 1);
trace(TraceLump, "lookupscore loaded");
/*
* no one else can load an entry for this score,
* since we have the overall score lock.
*/
qlock(&icache.lock);
/*
* If we notice that all the hits are coming from one arena,
* load the table of contents for that arena into the cache.
*/
ie = icachealloc(&d.ia, score);
icache.last[icache.nlast++%nelem(icache.last)] = amapitoa(mainindex, ie->ia.addr, &aa);
aa = ie->ia.addr - aa; /* compute base addr of arena */
for(i=0; i<nelem(icache.last); i++)
if(icache.last[i] != icache.last[0])
break;
if(i==nelem(icache.last) && icache.lastload != icache.last[0]){
load = icache.last[0];
icache.lastload = load;
}
found:
ie->next = icache.heads[h];
icache.heads[h] = ie;
*ia = ie->ia;
*rac = ie->rac;
qunlock(&icache.lock);
if(load){
trace(TraceProc, "preload 0x%llux", aa);
loadarenaclumps(load, aa);
}
ms = msec() - ms;
addstat2(StatIcacheRead, 1, StatIcacheReadTime, ms);
return 0;
}
/*
* insert a new element in the hash table.
*/
int
insertscore(u8int *score, IAddr *ia, int write)
{
IEntry *ie, se;
u32int h;
trace(TraceLump, "insertscore enter");
if(write)
addstat(StatIcacheWrite, 1);
else
addstat(StatIcachePrefetch, 1);
qlock(&icache.lock);
h = hashbits(score, icache.bits);
ie = icachealloc(ia, score);
if(write){
icache.ndirty++;
setstat(StatIcacheDirty, icache.ndirty);
delaykickicache();
ie->dirty = 1;
}
ie->next = icache.heads[h];
icache.heads[h] = ie;
se = *ie;
qunlock(&icache.lock);
if(write && icache.ndirty >= icache.maxdirty)
kickicache();
/*
* It's okay not to do this under icache.lock.
* Calling insertscore only happens when we hold
* the lump, meaning any searches for this block
* will hit in the lump cache until after we return.
*/
markbloomfilter(mainindex->bloom, score);
return 0;
}
/*
* allocate a index cache entry which hasn't been used in a while.
* must be called with icache.lock locked
* if the score is already in the table, update the entry.
*/
static IEntry *
icachealloc(IAddr *ia, u8int *score)
{
int i;
IEntry *ie, *last, *clean, *lastclean;
u32int h;
h = hashbits(score, icache.bits);
last = nil;
for(ie = icache.heads[h]; ie != nil; ie = ie->next){
if(ie->ia.type == ia->type && scorecmp(ie->score, score)==0){
if(last != nil)
last->next = ie->next;
else
icache.heads[h] = ie->next;
trace(TraceLump, "icachealloc hit");
ie->rac = 1;
return ie;
}
last = ie;
}
h = icache.unused;
if(h < icache.entries){
ie = &icache.base[h++];
icache.unused = h;
trace(TraceLump, "icachealloc unused");
goto Found;
}
h = icache.stolen;
for(i=0;; i++){
h++;
if(h >= icache.size)
h = 0;
if(i == icache.size){
trace(TraceLump, "icachealloc sleep");
addstat(StatIcacheStall, 1);
while(icache.ndirty == icache.entries){
/*
* This is a bit suspect. Kickicache will wake up the
* icachewritecoord, but if all the index entries are for
* unflushed disk blocks, icachewritecoord won't be
* able to do much. It always rewakes everyone when
* it thinks it is done, though, so at least we'll go around
* the while loop again. Also, if icachewritecoord sees
* that the disk state hasn't change at all since the last
* time around, it kicks the disk. This needs to be
* rethought, but it shouldn't deadlock anymore.
*/
kickicache();
rsleep(&icache.full);
}
addstat(StatIcacheStall, -1);
i = 0;
}
lastclean = nil;
clean = nil;
last = nil;
for(ie=icache.heads[h]; ie; last=ie, ie=ie->next){
if(!ie->dirty){
clean = ie;
lastclean = last;
}
}
if(clean){
if(lastclean)
lastclean->next = clean->next;
else
icache.heads[h] = clean->next;
clean->next = nil;
icache.stolen = h;
ie = clean;
trace(TraceLump, "icachealloc steal");
goto Found;
}
}
Found:
ie->ia = *ia;
scorecp(ie->score, score);
ie->rac = 0;
return ie;
}
IEntry*
icachedirty(u32int lo, u32int hi, u64int limit)
{
int i;
u32int h;
IEntry *ie, *dirty;
dirty = nil;
trace(TraceProc, "icachedirty enter");
qlock(&icache.lock);
for(i=0; i<icache.size; i++)
for(ie = icache.heads[i]; ie; ie=ie->next)
if(ie->dirty && ie->ia.addr != 0 && ie->ia.addr < limit){
h = hashbits(ie->score, 32);
if(lo <= h && h <= hi){
ie->nextdirty = dirty;
dirty = ie;
}
}
qunlock(&icache.lock);
trace(TraceProc, "icachedirty exit");
if(dirty == nil)
flushdcache();
return dirty;
}
void
icacheclean(IEntry *ie)
{
trace(TraceProc, "icachedirty enter");
qlock(&icache.lock);
for(; ie; ie=ie->nextdirty){
icache.ndirty--;
ie->dirty = 0;
}
setstat(StatIcacheDirty, icache.ndirty);
rwakeupall(&icache.full);
qunlock(&icache.lock);
trace(TraceProc, "icachedirty exit");
}

View File

@ -0,0 +1,318 @@
/*
* Write the dirty icache entries to disk. Random seeks are
* so expensive that it makes sense to wait until we have
* a lot and then just make a sequential pass over the disk.
*/
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
static void icachewriteproc(void*);
static void icachewritecoord(void*);
static IEntry *iesort(IEntry*);
int icachesleeptime = 1000; /* milliseconds */
enum
{
Bufsize = 8*1024*1024
};
typedef struct IWrite IWrite;
struct IWrite
{
Round round;
AState as;
};
static IWrite iwrite;
void
initicachewrite(void)
{
int i;
Index *ix;
initround(&iwrite.round, "icache", 120*60*1000);
ix = mainindex;
for(i=0; i<ix->nsects; i++){
ix->sects[i]->writechan = chancreate(sizeof(ulong), 1);
ix->sects[i]->writedonechan = chancreate(sizeof(ulong), 1);
vtproc(icachewriteproc, ix->sects[i]);
}
vtproc(icachewritecoord, nil);
vtproc(delaykickroundproc, &iwrite.round);
}
static IEntry*
nextchunk(Index *ix, ISect *is, IEntry **pie, u64int *paddr, uint *pnbuf)
{
u64int addr, naddr;
uint nbuf;
int bsize;
IEntry *iefirst, *ie, **l;
bsize = 1<<is->blocklog;
iefirst = *pie;
addr = is->blockbase + ((u64int)(hashbits(iefirst->score, 32) / ix->div - is->start) << is->blocklog);
nbuf = 0;
for(l=&iefirst->nextdirty; (ie=*l)!=nil; l=&(*l)->nextdirty){
naddr = is->blockbase + ((u64int)(hashbits(ie->score, 32) / ix->div - is->start) << is->blocklog);
if(naddr - addr >= Bufsize)
break;
nbuf = naddr-addr;
}
nbuf += bsize;
*l = nil;
*pie = ie;
*paddr = addr;
*pnbuf = nbuf;
return iefirst;
}
static int
icachewritesect(Index *ix, ISect *is, u8int *buf)
{
int err, h, bsize;
u32int lo, hi;
u64int addr, naddr;
uint nbuf, off;
DBlock *b;
IBucket ib;
IEntry *ie, *iedirty, **l, *chunk;
lo = is->start * ix->div;
if(TWID32/ix->div < is->stop)
hi = TWID32;
else
hi = is->stop * ix->div - 1;
trace(TraceProc, "icachewritesect enter %ud %ud %llud", lo, hi, iwrite.as.aa);
iedirty = icachedirty(lo, hi, iwrite.as.aa);
iedirty = iesort(iedirty);
bsize = 1<<is->blocklog;
err = 0;
while(iedirty){
sleep(icachesleeptime);
trace(TraceProc, "icachewritesect nextchunk");
chunk = nextchunk(ix, is, &iedirty, &addr, &nbuf);
trace(TraceProc, "icachewritesect readpart 0x%llux+0x%ux", addr, nbuf);
if(readpart(is->part, addr, buf, nbuf) < 0){
// XXX
fprint(2, "icachewriteproc readpart: %r\n");
err = -1;
continue;
}
trace(TraceProc, "icachewritesect updatebuf");
addstat(StatIsectReadBytes, nbuf);
addstat(StatIsectRead, 1);
for(l=&chunk; (ie=*l)!=nil; l=&ie->nextdirty){
again:
naddr = is->blockbase + ((u64int)(hashbits(ie->score, 32) / ix->div - is->start) << is->blocklog);
off = naddr - addr;
if(off+bsize > nbuf){
fprint(2, "whoops! addr=0x%llux nbuf=%ud addr+nbuf=0x%llux naddr=0x%llux\n",
addr, nbuf, addr+nbuf, naddr);
assert(off+bsize <= nbuf);
}
unpackibucket(&ib, buf+off, is->bucketmagic);
if(okibucket(&ib, is) < 0){
fprint(2, "bad bucket XXX\n");
goto skipit;
}
trace(TraceProc, "icachewritesect add %V at 0x%llux", ie->score, naddr);
h = bucklook(ie->score, ie->ia.type, ib.data, ib.n);
if(h & 1){
h ^= 1;
packientry(ie, &ib.data[h]);
}else if(ib.n < is->buckmax){
memmove(&ib.data[h+IEntrySize], &ib.data[h], ib.n*IEntrySize - h);
ib.n++;
packientry(ie, &ib.data[h]);
}else{
fprint(2, "bucket overflow XXX\n");
skipit:
err = -1;
*l = ie->nextdirty;
ie = *l;
if(ie)
goto again;
else
break;
}
packibucket(&ib, buf+off, is->bucketmagic);
if((b = _getdblock(is->part, naddr, ORDWR, 0)) != nil){
memmove(b->data, buf+off, bsize);
putdblock(b);
}
}
trace(TraceProc, "icachewritesect writepart", addr, nbuf);
if(writepart(is->part, addr, buf, nbuf) < 0){
// XXX
fprint(2, "icachewriteproc writepart: %r\n");
err = -1;
continue;
}
addstat(StatIsectWriteBytes, nbuf);
addstat(StatIsectWrite, 1);
icacheclean(chunk);
}
trace(TraceProc, "icachewritesect done");
return err;
}
static void
icachewriteproc(void *v)
{
uint bsize;
ISect *is;
Index *ix;
u8int *buf;
ix = mainindex;
is = v;
threadsetname("icachewriteproc:%s", is->part->name);
bsize = 1<<is->blocklog;
buf = emalloc(Bufsize+bsize);
buf = (u8int*)(((ulong)buf+bsize-1)&~(ulong)(bsize-1));
for(;;){
trace(TraceProc, "icachewriteproc recv");
recv(is->writechan, 0);
trace(TraceWork, "start");
icachewritesect(ix, is, buf);
trace(TraceProc, "icachewriteproc send");
trace(TraceWork, "finish");
send(is->writedonechan, 0);
}
}
static void
icachewritecoord(void *v)
{
int i;
Index *ix;
AState as;
USED(v);
threadsetname("icachewritecoord");
ix = mainindex;
iwrite.as = diskstate();
for(;;){
trace(TraceProc, "icachewritecoord sleep");
waitforkick(&iwrite.round);
trace(TraceWork, "start");
as = diskstate();
if(as.arena==iwrite.as.arena && as.aa==iwrite.as.aa){
/* will not be able to do anything more than last flush - kick disk */
trace(TraceProc, "icachewritecoord flush dcache");
kickdcache();
trace(TraceProc, "icachewritecoord flushed dcache");
}
iwrite.as = as;
trace(TraceProc, "icachewritecoord start flush");
if(iwrite.as.arena){
for(i=0; i<ix->nsects; i++)
send(ix->sects[i]->writechan, 0);
if(ix->bloom)
send(ix->bloom->writechan, 0);
for(i=0; i<ix->nsects; i++)
recv(ix->sects[i]->writedonechan, 0);
if(ix->bloom)
recv(ix->bloom->writedonechan, 0);
trace(TraceProc, "icachewritecoord donewrite");
setatailstate(&iwrite.as);
}
icacheclean(nil); /* wake up anyone waiting */
trace(TraceWork, "finish");
addstat(StatIcacheFlush, 1);
}
}
void
flushicache(void)
{
trace(TraceProc, "flushicache enter");
kickround(&iwrite.round, 1);
trace(TraceProc, "flushicache exit");
}
void
kickicache(void)
{
kickround(&iwrite.round, 0);
}
void
delaykickicache(void)
{
delaykickround(&iwrite.round);
}
static IEntry*
iesort(IEntry *ie)
{
int cmp;
IEntry **l;
IEntry *ie1, *ie2, *sorted;
if(ie == nil || ie->nextdirty == nil)
return ie;
/* split the lists */
ie1 = ie;
ie2 = ie;
if(ie2)
ie2 = ie2->nextdirty;
if(ie2)
ie2 = ie2->nextdirty;
while(ie1 && ie2){
ie1 = ie1->nextdirty;
ie2 = ie2->nextdirty;
if(ie2)
ie2 = ie2->nextdirty;
}
if(ie1){
ie2 = ie1->nextdirty;
ie1->nextdirty = nil;
}
/* sort the lists */
ie1 = iesort(ie);
ie2 = iesort(ie2);
/* merge the lists */
sorted = nil;
l = &sorted;
cmp = 0;
while(ie1 || ie2){
if(ie1 && ie2)
cmp = scorecmp(ie1->score, ie2->score);
if(ie1==nil || (ie2 && cmp > 0)){
*l = ie2;
l = &ie2->nextdirty;
ie2 = ie2->nextdirty;
}else{
*l = ie1;
l = &ie1->nextdirty;
ie1 = ie1->nextdirty;
}
}
*l = nil;
return sorted;
}

93
src/cmd/venti/srv/ifile.c Normal file
View File

@ -0,0 +1,93 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
int
readifile(IFile *f, char *name)
{
ZBlock *b;
b = readfile(name);
if(b == nil)
return -1;
f->name = name;
f->b = b;
f->pos = 0;
return 0;
}
void
freeifile(IFile *f)
{
freezblock(f->b);
f->b = nil;
f->pos = 0;
}
int
partifile(IFile *f, Part *part, u64int start, u32int size)
{
ZBlock *b;
b = alloczblock(size, 0, part->blocksize);
if(b == nil)
return -1;
if(readpart(part, start, b->data, size) < 0){
seterr(EAdmin, "can't read %s: %r", part->name);
freezblock(b);
return -1;
}
f->name = part->name;
f->b = b;
f->pos = 0;
return 0;
}
/*
* return the next non-blank input line,
* stripped of leading white space and with # comments eliminated
*/
char*
ifileline(IFile *f)
{
char *s, *e, *t;
int c;
for(;;){
s = (char*)&f->b->data[f->pos];
e = memchr(s, '\n', f->b->len - f->pos);
if(e == nil)
return nil;
*e++ = '\0';
f->pos = e - (char*)f->b->data;
t = strchr(s, '#');
if(t != nil)
*t = '\0';
for(; c = *s; s++)
if(c != ' ' && c != '\t' && c != '\r')
return s;
}
}
int
ifilename(IFile *f, char *dst)
{
char *s;
s = ifileline(f);
if(s == nil || strlen(s) >= ANameSize)
return -1;
namecp(dst, s);
return 0;
}
int
ifileu32int(IFile *f, u32int *r)
{
char *s;
s = ifileline(f);
if(s == nil)
return -1;
return stru32int(s, r);
}

819
src/cmd/venti/srv/index.c Normal file
View File

@ -0,0 +1,819 @@
/*
* Index, mapping scores to log positions.
*
* The index is made up of some number of index sections, each of
* which is typically stored on a different disk. The blocks in all the
* index sections are logically numbered, with each index section
* responsible for a range of blocks. Blocks are typically 8kB.
*
* The N index blocks are treated as a giant hash table. The top 32 bits
* of score are used as the key for a lookup. Each index block holds
* one hash bucket, which is responsible for ceil(2^32 / N) of the key space.
*
* The index is sized so that a particular bucket is extraordinarily
* unlikely to overflow: assuming compressed data blocks are 4kB
* on disk, and assuming each block has a 40 byte index entry,
* the index data will be 1% of the total data. Since scores are essentially
* random, all buckets should be about the same fullness.
* A factor of 5 gives us a wide comfort boundary to account for
* random variation. So the index disk space should be 5% of the arena disk space.
*/
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
//static int bucklook(u8int *score, int type, u8int *data, int n);
//static int writebucket(ISect *is, u32int buck, IBucket *ib, DBlock *b);
//static int okibucket(IBucket *ib, ISect *is);
static int initindex1(Index*);
static ISect *initisect1(ISect *is);
//static int splitiblock(Index *ix, DBlock *b, ISect *is, u32int buck, IBucket *ib);
#define KEY(k,d) ((d) ? (k)>>(32-(d)) : 0)
//static QLock indexlock; //ZZZ
static char IndexMagic[] = "venti index configuration";
Index*
initindex(char *name, ISect **sects, int n)
{
IFile f;
Index *ix;
ISect *is;
u32int last, blocksize, tabsize;
int i;
if(n <= 0){
fprint(2, "bad n\n");
seterr(EOk, "no index sections to initialize index");
return nil;
}
ix = MKZ(Index);
if(ix == nil){
fprint(2, "no mem\n");
seterr(EOk, "can't initialize index: out of memory");
freeindex(ix);
return nil;
}
tabsize = sects[0]->tabsize;
if(partifile(&f, sects[0]->part, sects[0]->tabbase, tabsize) < 0)
return nil;
if(parseindex(&f, ix) < 0){
freeifile(&f);
freeindex(ix);
return nil;
}
freeifile(&f);
if(namecmp(ix->name, name) != 0){
seterr(ECorrupt, "mismatched index name: found %s expected %s", ix->name, name);
return nil;
}
if(ix->nsects != n){
seterr(ECorrupt, "mismatched number index sections: found %d expected %d", n, ix->nsects);
freeindex(ix);
return nil;
}
ix->sects = sects;
last = 0;
blocksize = ix->blocksize;
for(i = 0; i < ix->nsects; i++){
is = sects[i];
if(namecmp(ix->name, is->index) != 0
|| is->blocksize != blocksize
|| is->tabsize != tabsize
|| namecmp(is->name, ix->smap[i].name) != 0
|| is->start != ix->smap[i].start
|| is->stop != ix->smap[i].stop
|| last != is->start
|| is->start > is->stop){
seterr(ECorrupt, "inconsistent index sections in %s", ix->name);
freeindex(ix);
return nil;
}
last = is->stop;
}
ix->tabsize = tabsize;
ix->buckets = last;
if(initindex1(ix) < 0){
freeindex(ix);
return nil;
}
ix->arenas = MKNZ(Arena*, ix->narenas);
if(maparenas(ix->amap, ix->arenas, ix->narenas, ix->name) < 0){
freeindex(ix);
return nil;
}
return ix;
}
static int
initindex1(Index *ix)
{
u32int buckets;
ix->div = (((u64int)1 << 32) + ix->buckets - 1) / ix->buckets;
buckets = (((u64int)1 << 32) - 1) / ix->div + 1;
if(buckets != ix->buckets){
seterr(ECorrupt, "inconsistent math for divisor and buckets in %s", ix->name);
return -1;
}
return 0;
}
int
wbindex(Index *ix)
{
Fmt f;
ZBlock *b;
int i;
if(ix->nsects == 0){
seterr(EOk, "no sections in index %s", ix->name);
return -1;
}
b = alloczblock(ix->tabsize, 1, ix->blocksize);
if(b == nil){
seterr(EOk, "can't write index configuration: out of memory");
return -1;
}
fmtzbinit(&f, b);
if(outputindex(&f, ix) < 0){
seterr(EOk, "can't make index configuration: table storage too small %d", ix->tabsize);
freezblock(b);
return -1;
}
for(i = 0; i < ix->nsects; i++){
if(writepart(ix->sects[i]->part, ix->sects[i]->tabbase, b->data, ix->tabsize) < 0){
seterr(EOk, "can't write index: %r");
freezblock(b);
return -1;
}
}
freezblock(b);
for(i = 0; i < ix->nsects; i++)
if(wbisect(ix->sects[i]) < 0)
return -1;
return 0;
}
/*
* index: IndexMagic '\n' version '\n' name '\n' blocksize '\n' [V2: bitblocks '\n'] sections arenas
* version, blocksize: u32int
* name: max. ANameSize string
* sections, arenas: AMap
*/
int
outputindex(Fmt *f, Index *ix)
{
if(fmtprint(f, "%s\n%ud\n%s\n%ud\n", IndexMagic, ix->version, ix->name, ix->blocksize) < 0
|| outputamap(f, ix->smap, ix->nsects) < 0
|| outputamap(f, ix->amap, ix->narenas) < 0)
return -1;
return 0;
}
int
parseindex(IFile *f, Index *ix)
{
AMapN amn;
u32int v;
char *s;
/*
* magic
*/
s = ifileline(f);
if(s == nil || strcmp(s, IndexMagic) != 0){
seterr(ECorrupt, "bad index magic for %s", f->name);
return -1;
}
/*
* version
*/
if(ifileu32int(f, &v) < 0){
seterr(ECorrupt, "syntax error: bad version number in %s", f->name);
return -1;
}
ix->version = v;
if(ix->version != IndexVersion){
seterr(ECorrupt, "bad version number in %s", f->name);
return -1;
}
/*
* name
*/
if(ifilename(f, ix->name) < 0){
seterr(ECorrupt, "syntax error: bad index name in %s", f->name);
return -1;
}
/*
* block size
*/
if(ifileu32int(f, &v) < 0){
seterr(ECorrupt, "syntax error: bad block size number in %s", f->name);
return -1;
}
ix->blocksize = v;
if(parseamap(f, &amn) < 0)
return -1;
ix->nsects = amn.n;
ix->smap = amn.map;
if(parseamap(f, &amn) < 0)
return -1;
ix->narenas = amn.n;
ix->amap = amn.map;
return 0;
}
/*
* initialize an entirely new index
*/
Index *
newindex(char *name, ISect **sects, int n)
{
Index *ix;
AMap *smap;
u64int nb;
u32int div, ub, xb, fb, start, stop, blocksize, tabsize;
int i, j;
if(n < 1){
seterr(EOk, "creating index with no index sections");
return nil;
}
/*
* compute the total buckets available in the index,
* and the total buckets which are used.
*/
nb = 0;
blocksize = sects[0]->blocksize;
tabsize = sects[0]->tabsize;
for(i = 0; i < n; i++){
if(sects[i]->start != 0 || sects[i]->stop != 0
|| sects[i]->index[0] != '\0'){
seterr(EOk, "creating new index using non-empty section %s", sects[i]->name);
return nil;
}
if(blocksize != sects[i]->blocksize){
seterr(EOk, "mismatched block sizes in index sections");
return nil;
}
if(tabsize != sects[i]->tabsize){
seterr(EOk, "mismatched config table sizes in index sections");
return nil;
}
nb += sects[i]->blocks;
}
/*
* check for duplicate names
*/
for(i = 0; i < n; i++){
for(j = i + 1; j < n; j++){
if(namecmp(sects[i]->name, sects[j]->name) == 0){
seterr(EOk, "duplicate section name %s for index %s", sects[i]->name, name);
return nil;
}
}
}
if(nb >= ((u64int)1 << 32)){
seterr(EBug, "index too large");
return nil;
}
fb = 0;
div = (((u64int)1 << 32) + nb - 1) / nb;
ub = (((u64int)1 << 32) - 1) / div + 1;
if(div < 100){
seterr(EBug, "index divisor too coarse");
return nil;
}
if(ub > nb){
seterr(EBug, "index initialization math wrong");
return nil;
}
xb = nb - ub;
/*
* initialize each of the index sections
* and the section map table
*/
smap = MKNZ(AMap, n);
if(smap == nil){
seterr(EOk, "can't create new index: out of memory");
return nil;
}
start = 0;
for(i = 0; i < n; i++){
stop = start + sects[i]->blocks - xb / n;
if(i == n - 1)
stop = ub;
sects[i]->start = start;
sects[i]->stop = stop;
namecp(sects[i]->index, name);
smap[i].start = start;
smap[i].stop = stop;
namecp(smap[i].name, sects[i]->name);
start = stop;
}
/*
* initialize the index itself
*/
ix = MKZ(Index);
if(ix == nil){
seterr(EOk, "can't create new index: out of memory");
free(smap);
return nil;
}
ix->version = IndexVersion;
namecp(ix->name, name);
ix->sects = sects;
ix->smap = smap;
ix->nsects = n;
ix->blocksize = blocksize;
ix->buckets = ub;
ix->tabsize = tabsize;
ix->div = div;
ix->bitblocks = fb;
if(initindex1(ix) < 0){
free(smap);
return nil;
}
return ix;
}
ISect*
initisect(Part *part)
{
ISect *is;
ZBlock *b;
int ok;
b = alloczblock(HeadSize, 0, 0);
if(b == nil || readpart(part, PartBlank, b->data, HeadSize) < 0){
seterr(EAdmin, "can't read index section header: %r");
return nil;
}
is = MKZ(ISect);
if(is == nil){
freezblock(b);
return nil;
}
is->part = part;
ok = unpackisect(is, b->data);
freezblock(b);
if(ok < 0){
seterr(ECorrupt, "corrupted index section header: %r");
freeisect(is);
return nil;
}
if(is->version != ISectVersion1 && is->version != ISectVersion2){
seterr(EAdmin, "unknown index section version %d", is->version);
freeisect(is);
return nil;
}
return initisect1(is);
}
ISect*
newisect(Part *part, u32int vers, char *name, u32int blocksize, u32int tabsize)
{
ISect *is;
u32int tabbase;
is = MKZ(ISect);
if(is == nil)
return nil;
namecp(is->name, name);
is->version = vers;
is->part = part;
is->blocksize = blocksize;
is->start = 0;
is->stop = 0;
tabbase = (PartBlank + HeadSize + blocksize - 1) & ~(blocksize - 1);
is->blockbase = (tabbase + tabsize + blocksize - 1) & ~(blocksize - 1);
is->blocks = is->part->size / blocksize - is->blockbase / blocksize;
is->bucketmagic = 0;
if(is->version == ISectVersion2){
do{
is->bucketmagic = fastrand();
}while(is->bucketmagic==0);
}
is = initisect1(is);
if(is == nil)
return nil;
return is;
}
/*
* initialize the computed parameters for an index
*/
static ISect*
initisect1(ISect *is)
{
u64int v;
is->buckmax = (is->blocksize - IBucketSize) / IEntrySize;
is->blocklog = u64log2(is->blocksize);
if(is->blocksize != (1 << is->blocklog)){
seterr(ECorrupt, "illegal non-power-of-2 bucket size %d\n", is->blocksize);
freeisect(is);
return nil;
}
partblocksize(is->part, is->blocksize);
is->tabbase = (PartBlank + HeadSize + is->blocksize - 1) & ~(is->blocksize - 1);
if(is->tabbase >= is->blockbase){
seterr(ECorrupt, "index section config table overlaps bucket storage");
freeisect(is);
return nil;
}
is->tabsize = is->blockbase - is->tabbase;
v = is->part->size & ~(u64int)(is->blocksize - 1);
if(is->blockbase + (u64int)is->blocks * is->blocksize != v){
seterr(ECorrupt, "invalid blocks in index section %s", is->name);
//ZZZZZZZZZ
// freeisect(is);
// return nil;
}
if(is->stop - is->start > is->blocks){
seterr(ECorrupt, "index section overflows available space");
freeisect(is);
return nil;
}
if(is->start > is->stop){
seterr(ECorrupt, "invalid index section range");
freeisect(is);
return nil;
}
return is;
}
int
wbisect(ISect *is)
{
ZBlock *b;
b = alloczblock(HeadSize, 1, 0);
if(b == nil)
//ZZZ set error?
return -1;
if(packisect(is, b->data) < 0){
seterr(ECorrupt, "can't make index section header: %r");
freezblock(b);
return -1;
}
if(writepart(is->part, PartBlank, b->data, HeadSize) < 0){
seterr(EAdmin, "can't write index section header: %r");
freezblock(b);
return -1;
}
freezblock(b);
return 0;
}
void
freeisect(ISect *is)
{
if(is == nil)
return;
free(is);
}
void
freeindex(Index *ix)
{
int i;
if(ix == nil)
return;
free(ix->amap);
free(ix->arenas);
if(ix->sects)
for(i = 0; i < ix->nsects; i++)
freeisect(ix->sects[i]);
free(ix->sects);
free(ix->smap);
free(ix);
}
/*
* write a clump to an available arena in the index
* and return the address of the clump within the index.
ZZZ question: should this distinguish between an arena
filling up and real errors writing the clump?
*/
u64int
writeiclump(Index *ix, Clump *c, u8int *clbuf, u64int *pa)
{
u64int a;
int i;
trace(TraceLump, "writeiclump enter");
for(i = ix->mapalloc; i < ix->narenas; i++){
a = writeaclump(ix->arenas[i], c, clbuf, ix->amap[i].start, pa);
if(a != TWID64){
ix->mapalloc = i; /* assuming write is atomic, race is okay */
trace(TraceLump, "writeiclump exit");
return a;
}
}
seterr(EAdmin, "no space left in arenas");
trace(TraceLump, "writeiclump failed");
return TWID64;
}
/*
* convert an arena index to an relative arena address
*/
Arena*
amapitoa(Index *ix, u64int a, u64int *aa)
{
int i, r, l, m;
l = 1;
r = ix->narenas - 1;
while(l <= r){
m = (r + l) / 2;
if(ix->amap[m].start <= a)
l = m + 1;
else
r = m - 1;
}
l--;
if(a > ix->amap[l].stop){
for(i=0; i<ix->narenas; i++)
print("arena %d: %llux - %llux\n", i, ix->amap[i].start, ix->amap[i].stop);
print("want arena %d for %llux\n", l, a);
seterr(ECrash, "unmapped address passed to amapitoa");
return nil;
}
if(ix->arenas[l] == nil){
seterr(ECrash, "unmapped arena selected in amapitoa");
return nil;
}
*aa = a - ix->amap[l].start;
return ix->arenas[l];
}
int
iaddrcmp(IAddr *ia1, IAddr *ia2)
{
return ia1->type != ia2->type
|| ia1->size != ia2->size
|| ia1->blocks != ia2->blocks
|| ia1->addr != ia2->addr;
}
/*
* lookup the score in the partition
*
* nothing needs to be explicitly locked:
* only static parts of ix are used, and
* the bucket is locked by the DBlock lock.
*/
int
loadientry(Index *ix, u8int *score, int type, IEntry *ie)
{
ISect *is;
DBlock *b;
IBucket ib;
u32int buck;
int h, ok;
ok = -1;
trace(TraceLump, "loadientry enter");
/*
qlock(&stats.lock);
stats.indexreads++;
qunlock(&stats.lock);
*/
if(!inbloomfilter(mainindex->bloom, score)){
trace(TraceLump, "loadientry bloomhit");
return -1;
}
trace(TraceLump, "loadientry loadibucket");
b = loadibucket(ix, score, &is, &buck, &ib);
trace(TraceLump, "loadientry loadedibucket");
if(b == nil)
return -1;
if(okibucket(&ib, is) < 0){
trace(TraceLump, "loadientry badbucket");
goto out;
}
h = bucklook(score, type, ib.data, ib.n);
if(h & 1){
h ^= 1;
trace(TraceLump, "loadientry found");
unpackientry(ie, &ib.data[h]);
ok = 0;
goto out;
}
trace(TraceLump, "loadientry notfound");
addstat(StatBloomFalseMiss, 1);
out:
putdblock(b);
trace(TraceLump, "loadientry exit");
return ok;
}
int
okibucket(IBucket *ib, ISect *is)
{
if(ib->n <= is->buckmax)
return 0;
seterr(EICorrupt, "corrupted disk index bucket: n=%ud max=%ud, range=[%lud,%lud)",
ib->n, is->buckmax, is->start, is->stop);
return -1;
}
/*
* look for score within data;
* return 1 | byte index of matching index,
* or 0 | index of least element > score
*/
int
bucklook(u8int *score, int otype, u8int *data, int n)
{
int i, r, l, m, h, c, cc, type;
type = vttodisktype(otype);
l = 0;
r = n - 1;
while(l <= r){
m = (r + l) >> 1;
h = m * IEntrySize;
for(i = 0; i < VtScoreSize; i++){
c = score[i];
cc = data[h + i];
if(c != cc){
if(c > cc)
l = m + 1;
else
r = m - 1;
goto cont;
}
}
cc = data[h + IEntryTypeOff];
if(type != cc){
if(type > cc)
l = m + 1;
else
r = m - 1;
goto cont;
}
return h | 1;
cont:;
}
return l * IEntrySize;
}
/*
* compare two IEntries; consistent with bucklook
*/
int
ientrycmp(const void *vie1, const void *vie2)
{
u8int *ie1, *ie2;
int i, v1, v2;
ie1 = (u8int*)vie1;
ie2 = (u8int*)vie2;
for(i = 0; i < VtScoreSize; i++){
v1 = ie1[i];
v2 = ie2[i];
if(v1 != v2){
if(v1 < v2)
return -1;
return 1;
}
}
v1 = ie1[IEntryTypeOff];
v2 = ie2[IEntryTypeOff];
if(v1 != v2){
if(v1 < v2)
return -1;
return 1;
}
return 0;
}
/*
* find the number of the index section holding bucket #buck
*/
int
indexsect0(Index *ix, u32int buck)
{
int r, l, m;
l = 1;
r = ix->nsects - 1;
while(l <= r){
m = (r + l) >> 1;
if(ix->sects[m]->start <= buck)
l = m + 1;
else
r = m - 1;
}
return l - 1;
}
/*
* load the index block at bucket #buck
*/
static DBlock*
loadibucket0(Index *ix, u32int buck, ISect **pis, u32int *pbuck, IBucket *ib, int mode)
{
ISect *is;
DBlock *b;
is = ix->sects[indexsect0(ix, buck)];
if(buck < is->start || is->stop <= buck){
seterr(EAdmin, "index lookup out of range: %ud not found in index\n", buck);
return nil;
}
buck -= is->start;
if((b = getdblock(is->part, is->blockbase + ((u64int)buck << is->blocklog), mode)) == nil)
return nil;
if(pis)
*pis = is;
if(pbuck)
*pbuck = buck;
if(ib)
unpackibucket(ib, b->data, is->bucketmagic);
return b;
}
/*
* find the number of the index section holding score
*/
static int
indexsect1(Index *ix, u8int *score)
{
return indexsect0(ix, hashbits(score, 32) / ix->div);
}
/*
* load the index block responsible for score.
*/
static DBlock*
loadibucket1(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
{
return loadibucket0(ix, hashbits(score, 32)/ix->div, pis, pbuck, ib, OREAD);
}
int
indexsect(Index *ix, u8int *score)
{
return indexsect1(ix, score);
}
DBlock*
loadibucket(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
{
return loadibucket1(ix, score, pis, pbuck, ib);
}

249
src/cmd/venti/srv/lump.c Normal file
View File

@ -0,0 +1,249 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
int queuewrites = 0;
int writestodevnull = 0;
static Packet *readilump(Lump *u, IAddr *ia, u8int *score, int rac);
Packet*
readlump(u8int *score, int type, u32int size, int *cached)
{
Lump *u;
Packet *p;
IAddr ia;
u32int n;
int rac;
trace(TraceLump, "readlump enter");
/*
qlock(&stats.lock);
stats.lumpreads++;
qunlock(&stats.lock);
*/
if(scorecmp(score, zeroscore) == 0)
return packetalloc();
u = lookuplump(score, type);
if(u->data != nil){
trace(TraceLump, "readlump lookuplump hit");
if(cached)
*cached = 1;
n = packetsize(u->data);
if(n > size){
seterr(EOk, "read too small: asked for %d need at least %d", size, n);
putlump(u);
return nil;
}
p = packetdup(u->data, 0, n);
putlump(u);
return p;
}
if(cached)
*cached = 0;
if(lookupscore(score, type, &ia, &rac) < 0){
//ZZZ place to check for someone trying to guess scores
seterr(EOk, "no block with score %V/%d exists", score, type);
putlump(u);
return nil;
}
if(ia.size > size){
seterr(EOk, "read too small 1: asked for %d need at least %d", size, ia.size);
putlump(u);
return nil;
}
trace(TraceLump, "readlump readilump");
p = readilump(u, &ia, score, rac);
putlump(u);
trace(TraceLump, "readlump exit");
return p;
}
/*
* save away a lump, and return it's score.
* doesn't store duplicates, but checks that the data is really the same.
*/
int
writelump(Packet *p, u8int *score, int type, u32int creator, uint ms)
{
Lump *u;
int ok;
/*
qlock(&stats.lock);
stats.lumpwrites++;
qunlock(&stats.lock);
*/
packetsha1(p, score);
if(packetsize(p) == 0 || writestodevnull==1){
packetfree(p);
return 0;
}
u = lookuplump(score, type);
if(u->data != nil){
ok = 0;
if(packetcmp(p, u->data) != 0){
seterr(EStrange, "score collision");
ok = -1;
}
packetfree(p);
putlump(u);
return ok;
}
if(writestodevnull==2){
packetfree(p);
return 0;
}
if(queuewrites)
return queuewrite(u, p, creator, ms);
ok = writeqlump(u, p, creator, ms);
putlump(u);
return ok;
}
int
writeqlump(Lump *u, Packet *p, int creator, uint ms)
{
ZBlock *flat;
Packet *old;
IAddr ia;
int ok;
int rac;
if(lookupscore(u->score, u->type, &ia, &rac) == 0){
/* assume the data is here! XXX */
packetfree(p);
ms = msec() - ms;
addstat2(StatRpcWriteOld, 1, StatRpcWriteOldTime, ms);
return 0;
/*
* if the read fails,
* assume it was corrupted data and store the block again
*/
old = readilump(u, &ia, u->score, rac);
if(old != nil){
ok = 0;
if(packetcmp(p, old) != 0){
seterr(EStrange, "score collision");
ok = -1;
}
packetfree(p);
packetfree(old);
ms = msec() - ms;
addstat2(StatRpcWriteOld, 1, StatRpcWriteOldTime, ms);
return ok;
}
logerr(EAdmin, "writelump: read %V failed, rewriting: %r\n", u->score);
}
flat = packet2zblock(p, packetsize(p));
ok = storeclump(mainindex, flat, u->score, u->type, creator, &ia);
freezblock(flat);
if(ok == 0)
ok = insertscore(u->score, &ia, 1);
if(ok == 0)
insertlump(u, p);
else
packetfree(p);
ms = msec() - ms;
addstat2(StatRpcWriteNew, 1, StatRpcWriteNewTime, ms);
return ok;
}
static void
lreadahead(u64int a, Arena *arena, u64int aa, int n)
{
u8int buf[ClumpSize];
Clump cl;
IAddr ia;
while(n > 0) {
if (aa >= arena->memstats.used)
break;
if(readarena(arena, aa, buf, ClumpSize) < ClumpSize)
break;
if(unpackclump(&cl, buf, arena->clumpmagic) < 0)
break;
ia.addr = a;
ia.type = cl.info.type;
ia.size = cl.info.uncsize;
ia.blocks = (cl.info.size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
insertscore(cl.info.score, &ia, 0);
a += ClumpSize + cl.info.size;
aa += ClumpSize + cl.info.size;
n--;
}
}
static Packet*
readilump(Lump *u, IAddr *ia, u8int *score, int rac)
{
Arena *arena;
ZBlock *zb;
Packet *p, *pp;
Clump cl;
u64int a, aa;
u8int sc[VtScoreSize];
trace(TraceLump, "readilump enter");
arena = amapitoa(mainindex, ia->addr, &aa);
if(arena == nil){
trace(TraceLump, "readilump amapitoa failed");
return nil;
}
trace(TraceLump, "readilump loadclump");
zb = loadclump(arena, aa, ia->blocks, &cl, sc, paranoid);
if(zb == nil){
trace(TraceLump, "readilump loadclump failed");
return nil;
}
if(ia->size != cl.info.uncsize){
seterr(EInconsist, "index and clump size mismatch");
freezblock(zb);
return nil;
}
if(ia->type != cl.info.type){
seterr(EInconsist, "index and clump type mismatch");
freezblock(zb);
return nil;
}
if(scorecmp(score, sc) != 0){
seterr(ECrash, "score mismatch");
freezblock(zb);
return nil;
}
if(rac == 0) {
trace(TraceLump, "readilump readahead");
a = ia->addr + ClumpSize + cl.info.size;
aa += ClumpSize + cl.info.size;
lreadahead(a, arena, aa, 20);
}
trace(TraceLump, "readilump success");
p = zblock2packet(zb, cl.info.uncsize);
freezblock(zb);
pp = packetdup(p, 0, packetsize(p));
trace(TraceLump, "readilump insertlump");
insertlump(u, pp);
trace(TraceLump, "readilump exit");
return p;
}

View File

@ -0,0 +1,417 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
/* #define CHECK(x) x */
#define CHECK(x)
typedef struct LumpCache LumpCache;
enum
{
HashLog = 9,
HashSize = 1<<HashLog,
HashMask = HashSize - 1,
};
struct LumpCache
{
QLock lock;
Rendez full;
Lump *free; /* list of available lumps */
u32int allowed; /* total allowable space for packets */
u32int avail; /* remaining space for packets */
u32int now; /* ticks for usage timestamps */
Lump **heads; /* hash table for finding address */
int nheap; /* number of available victims */
Lump **heap; /* heap for locating victims */
int nblocks; /* number of blocks allocated */
Lump *blocks; /* array of block descriptors */
};
static LumpCache lumpcache;
static void delheap(Lump *db);
static int downheap(int i, Lump *b);
static void fixheap(int i, Lump *b);
static int upheap(int i, Lump *b);
static Lump *bumplump(void);
void
initlumpcache(u32int size, u32int nblocks)
{
Lump *last, *b;
int i;
lumpcache.full.l = &lumpcache.lock;
lumpcache.nblocks = nblocks;
lumpcache.allowed = size;
lumpcache.avail = size;
lumpcache.heads = MKNZ(Lump*, HashSize);
lumpcache.heap = MKNZ(Lump*, nblocks);
lumpcache.blocks = MKNZ(Lump, nblocks);
setstat(StatLcacheSize, lumpcache.nblocks);
last = nil;
for(i = 0; i < nblocks; i++){
b = &lumpcache.blocks[i];
b->type = TWID8;
b->heap = TWID32;
b->next = last;
last = b;
}
lumpcache.free = last;
lumpcache.nheap = 0;
}
Lump*
lookuplump(u8int *score, int type)
{
uint ms;
Lump *b;
u32int h;
ms = msec();
trace(TraceLump, "lookuplump enter");
h = hashbits(score, HashLog);
/*
* look for the block in the cache
*/
qlock(&lumpcache.lock);
CHECK(checklumpcache());
again:
for(b = lumpcache.heads[h]; b != nil; b = b->next){
if(scorecmp(score, b->score)==0 && type == b->type){
addstat(StatLcacheHit, 1);
trace(TraceLump, "lookuplump hit");
goto found;
}
}
trace(TraceLump, "lookuplump miss");
/*
* missed: locate the block with the oldest second to last use.
* remove it from the heap, and fix up the heap.
*/
while(lumpcache.free == nil){
trace(TraceLump, "lookuplump bump");
CHECK(checklumpcache());
if(bumplump() == nil){
CHECK(checklumpcache());
logerr(EAdmin, "all lump cache blocks in use");
addstat(StatLcacheStall, 1);
CHECK(checklumpcache());
rsleep(&lumpcache.full);
CHECK(checklumpcache());
addstat(StatLcacheStall, -1);
goto again;
}
CHECK(checklumpcache());
}
addstat(StatLcacheMiss, 1);
b = lumpcache.free;
lumpcache.free = b->next;
/*
* the new block has no last use, so assume it happens sometime in the middle
ZZZ this is not reasonable
*/
b->used = (b->used2 + lumpcache.now) / 2;
/*
* rechain the block on the correct hash chain
*/
b->next = lumpcache.heads[h];
lumpcache.heads[h] = b;
if(b->next != nil)
b->next->prev = b;
b->prev = nil;
scorecp(b->score, score);
b->type = type;
b->size = 0;
b->data = nil;
found:
b->ref++;
b->used2 = b->used;
b->used = lumpcache.now++;
if(b->heap != TWID32)
fixheap(b->heap, b);
CHECK(checklumpcache());
qunlock(&lumpcache.lock);
addstat(StatLumpStall, 1);
qlock(&b->lock);
addstat(StatLumpStall, -1);
trace(TraceLump, "lookuplump exit");
addstat2(StatLcacheRead, 1, StatLcacheReadTime, msec()-ms);
return b;
}
void
insertlump(Lump *b, Packet *p)
{
u32int size;
/*
* look for the block in the cache
*/
trace(TraceLump, "insertlump enter");
qlock(&lumpcache.lock);
CHECK(checklumpcache());
again:
addstat(StatLcacheWrite, 1);
/*
* missed: locate the block with the oldest second to last use.
* remove it from the heap, and fix up the heap.
*/
size = packetasize(p);
//ZZZ
while(lumpcache.avail < size){
trace(TraceLump, "insertlump bump");
CHECK(checklumpcache());
if(bumplump() == nil){
logerr(EAdmin, "all lump cache blocks in use");
addstat(StatLcacheStall, 1);
CHECK(checklumpcache());
rsleep(&lumpcache.full);
CHECK(checklumpcache());
addstat(StatLcacheStall, -1);
goto again;
}
CHECK(checklumpcache());
}
b->data = p;
b->size = size;
lumpcache.avail -= size;
CHECK(checklumpcache());
qunlock(&lumpcache.lock);
trace(TraceLump, "insertlump exit");
}
void
putlump(Lump *b)
{
if(b == nil)
return;
trace(TraceLump, "putlump");
qunlock(&b->lock);
qlock(&lumpcache.lock);
CHECK(checklumpcache());
if(--b->ref == 0){
if(b->heap == TWID32)
upheap(lumpcache.nheap++, b);
trace(TraceLump, "putlump wakeup");
rwakeupall(&lumpcache.full);
}
CHECK(checklumpcache());
qunlock(&lumpcache.lock);
}
/*
* remove some lump from use and update the free list and counters
*/
static Lump*
bumplump(void)
{
Lump *b;
u32int h;
/*
* remove blocks until we find one that is unused
* referenced blocks are left in the heap even though
* they can't be scavenged; this is simple a speed optimization
*/
CHECK(checklumpcache());
for(;;){
if(lumpcache.nheap == 0){
trace(TraceLump, "bumplump emptyheap");
return nil;
}
b = lumpcache.heap[0];
delheap(b);
if(!b->ref){
trace(TraceLump, "bumplump wakeup");
rwakeupall(&lumpcache.full);
break;
}
}
/*
* unchain the block
*/
trace(TraceLump, "bumplump unchain");
if(b->prev == nil){
h = hashbits(b->score, HashLog);
if(lumpcache.heads[h] != b)
sysfatal("bad hash chains in lump cache");
lumpcache.heads[h] = b->next;
}else
b->prev->next = b->next;
if(b->next != nil)
b->next->prev = b->prev;
if(b->data != nil){
packetfree(b->data);
b->data = nil;
lumpcache.avail += b->size;
b->size = 0;
}
b->type = TWID8;
b->next = lumpcache.free;
lumpcache.free = b;
CHECK(checklumpcache());
trace(TraceLump, "bumplump exit");
return b;
}
/*
* delete an arbitrary block from the heap
*/
static void
delheap(Lump *db)
{
fixheap(db->heap, lumpcache.heap[--lumpcache.nheap]);
db->heap = TWID32;
}
/*
* push an element up or down to it's correct new location
*/
static void
fixheap(int i, Lump *b)
{
if(upheap(i, b) == i)
downheap(i, b);
}
static int
upheap(int i, Lump *b)
{
Lump *bb;
u32int now;
int p;
now = lumpcache.now;
for(; i != 0; i = p){
p = (i - 1) >> 1;
bb = lumpcache.heap[p];
if(b->used2 - now >= bb->used2 - now)
break;
lumpcache.heap[i] = bb;
bb->heap = i;
}
lumpcache.heap[i] = b;
b->heap = i;
return i;
}
static int
downheap(int i, Lump *b)
{
Lump *bb;
u32int now;
int k;
now = lumpcache.now;
for(; ; i = k){
k = (i << 1) + 1;
if(k >= lumpcache.nheap)
break;
if(k + 1 < lumpcache.nheap && lumpcache.heap[k]->used2 - now > lumpcache.heap[k + 1]->used2 - now)
k++;
bb = lumpcache.heap[k];
if(b->used2 - now <= bb->used2 - now)
break;
lumpcache.heap[i] = bb;
bb->heap = i;
}
lumpcache.heap[i] = b;
b->heap = i;
return i;
}
static void
findblock(Lump *bb)
{
Lump *b, *last;
int h;
last = nil;
h = hashbits(bb->score, HashLog);
for(b = lumpcache.heads[h]; b != nil; b = b->next){
if(last != b->prev)
sysfatal("bad prev link");
if(b == bb)
return;
last = b;
}
sysfatal("block score=%V type=%#x missing from hash table", bb->score, bb->type);
}
void
checklumpcache(void)
{
Lump *b;
u32int size, now, nfree;
int i, k, refed;
now = lumpcache.now;
for(i = 0; i < lumpcache.nheap; i++){
if(lumpcache.heap[i]->heap != i)
sysfatal("lc: mis-heaped at %d: %d", i, lumpcache.heap[i]->heap);
if(i > 0 && lumpcache.heap[(i - 1) >> 1]->used2 - now > lumpcache.heap[i]->used2 - now)
sysfatal("lc: bad heap ordering");
k = (i << 1) + 1;
if(k < lumpcache.nheap && lumpcache.heap[i]->used2 - now > lumpcache.heap[k]->used2 - now)
sysfatal("lc: bad heap ordering");
k++;
if(k < lumpcache.nheap && lumpcache.heap[i]->used2 - now > lumpcache.heap[k]->used2 - now)
sysfatal("lc: bad heap ordering");
}
refed = 0;
size = 0;
for(i = 0; i < lumpcache.nblocks; i++){
b = &lumpcache.blocks[i];
if(b->data == nil && b->size != 0)
sysfatal("bad size: %d data=%p", b->size, b->data);
if(b->ref && b->heap == TWID32)
refed++;
if(b->type != TWID8){
findblock(b);
size += b->size;
}
if(b->heap != TWID32
&& lumpcache.heap[b->heap] != b)
sysfatal("lc: spurious heap value");
}
if(lumpcache.avail != lumpcache.allowed - size){
fprint(2, "mismatched available=%d and allowed=%d - used=%d space", lumpcache.avail, lumpcache.allowed, size);
*(int*)0=0;
}
nfree = 0;
for(b = lumpcache.free; b != nil; b = b->next){
if(b->type != TWID8 || b->heap != TWID32)
sysfatal("lc: bad free list");
nfree++;
}
if(lumpcache.nheap + nfree + refed != lumpcache.nblocks)
sysfatal("lc: missing blocks: %d %d %d %d", lumpcache.nheap, refed, nfree, lumpcache.nblocks);
}

View File

@ -0,0 +1,187 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
typedef struct LumpQueue LumpQueue;
typedef struct WLump WLump;
enum
{
MaxLumpQ = 1 << 3 /* max. lumps on a single write queue, must be pow 2 */
};
struct WLump
{
Lump *u;
Packet *p;
int creator;
int gen;
uint ms;
};
struct LumpQueue
{
QLock lock;
Rendez flush;
Rendez full;
Rendez empty;
WLump q[MaxLumpQ];
int w;
int r;
};
static LumpQueue *lumpqs;
static int nqs;
static QLock glk;
static int gen;
static void queueproc(void *vq);
int
initlumpqueues(int nq)
{
LumpQueue *q;
int i;
nqs = nq;
lumpqs = MKNZ(LumpQueue, nq);
for(i = 0; i < nq; i++){
q = &lumpqs[i];
q->full.l = &q->lock;
q->empty.l = &q->lock;
q->flush.l = &q->lock;
if(vtproc(queueproc, q) < 0){
seterr(EOk, "can't start write queue slave: %r");
return -1;
}
if(vtproc(queueproc, q) < 0){
seterr(EOk, "can't start write queue slave: %r");
return -1;
}
if(vtproc(queueproc, q) < 0){
seterr(EOk, "can't start write queue slave: %r");
return -1;
}
if(vtproc(queueproc, q) < 0){
seterr(EOk, "can't start write queue slave: %r");
return -1;
}
if(vtproc(queueproc, q) < 0){
seterr(EOk, "can't start write queue slave: %r");
return -1;
}
}
return 0;
}
/*
* queue a lump & it's packet data for writing
*/
int
queuewrite(Lump *u, Packet *p, int creator, uint ms)
{
LumpQueue *q;
int i;
trace(TraceProc, "queuewrite");
i = indexsect(mainindex, u->score);
if(i < 0 || i >= nqs){
seterr(EBug, "internal error: illegal index section in queuewrite");
return -1;
}
q = &lumpqs[i];
qlock(&q->lock);
while(q->r == ((q->w + 1) & (MaxLumpQ - 1))){
trace(TraceProc, "queuewrite sleep");
rsleep(&q->full);
}
q->q[q->w].u = u;
q->q[q->w].p = p;
q->q[q->w].creator = creator;
q->q[q->w].ms = ms;
q->q[q->w].gen = gen;
q->w = (q->w + 1) & (MaxLumpQ - 1);
trace(TraceProc, "queuewrite wakeup");
rwakeup(&q->empty);
qunlock(&q->lock);
return 0;
}
void
flushqueue(void)
{
int i;
LumpQueue *q;
if(!lumpqs)
return;
trace(TraceProc, "flushqueue");
qlock(&glk);
gen++;
qunlock(&glk);
for(i=0; i<mainindex->nsects; i++){
q = &lumpqs[i];
qlock(&q->lock);
while(q->w != q->r && gen - q->q[q->r].gen > 0){
trace(TraceProc, "flushqueue sleep q%d", i);
rsleep(&q->flush);
}
qunlock(&q->lock);
}
}
static void
queueproc(void *vq)
{
LumpQueue *q;
Lump *u;
Packet *p;
int creator;
uint ms;
threadsetname("queueproc");
q = vq;
for(;;){
qlock(&q->lock);
while(q->w == q->r){
trace(TraceProc, "queueproc sleep empty");
rsleep(&q->empty);
}
u = q->q[q->r].u;
p = q->q[q->r].p;
creator = q->q[q->r].creator;
ms = q->q[q->r].ms;
q->r = (q->r + 1) & (MaxLumpQ - 1);
trace(TraceProc, "queueproc wakeup flush");
rwakeupall(&q->flush);
trace(TraceProc, "queueproc wakeup full");
rwakeup(&q->full);
qunlock(&q->lock);
trace(TraceProc, "queueproc writelump %V", u->score);
if(writeqlump(u, p, creator, ms) < 0)
fprint(2, "failed to write lump for %V: %r", u->score);
trace(TraceProc, "queueproc wrotelump %V", u->score);
putlump(u);
}
}

146
src/cmd/venti/srv/mkfile Normal file
View File

@ -0,0 +1,146 @@
<$PLAN9/src/mkhdr
CC=9c
AR=ar
LIBOFILES=\
arena.$O\
arenas.$O\
bloom.$O\
buildbuck.$O\
clump.$O\
config.$O\
conv.$O\
dcache.$O\
dump.$O\
graph.$O\
httpd.$O\
icache.$O\
icachewrite.$O\
ifile.$O\
index.$O\
lump.$O\
lumpcache.$O\
lumpqueue.$O\
part.$O\
png.$O\
round.$O\
score.$O\
sortientry.$O\
stats.$O\
syncarena.$O\
syncindex0.$O\
trace.$O\
unwhack.$O\
utils.$O\
unittoull.$O\
whack.$O\
xml.$O\
zblock.$O\
zeropart.$O\
SLIB=libvs.a
LIB=$SLIB
HFILES= dat.h\
fns.h\
stdinc.h\
TARG=\
venti\
fmtarenas\
fmtbloom\
fmtisect\
fmtindex\
buildindex\
checkarenas\
checkindex\
clumpstats\
findscore\
rdarena\
wrarena\
syncindex\
printarena\
verifyarena\
OFILES=
BIN=$BIN/venti
it:V: $O.venti
$O.venti: # debugmalloc2.$O # debugmalloc.$O #_p9dir.$O debugmalloc.$O
CLEANFILES=$CLEANFILES $SLIB
<$PLAN9/src/mkmany
$SLIB: $LIBOFILES
$AR rvc $SLIB $LIBOFILES
# xml.c:D: mkxml dat.h
# ./mkxml dat.h > xml.c
ainstall:V: ${TARG:%=%.ainstall}
%.ainstall:V: $O.%
scp $prereq amsterdam:/usr/local/bin/venti/$stem
test:VQ: ${TARG:%=o.%}
slay o.venti|rc
vtmp=/home/tmp
echo '**********' FMTARENAS
./o.fmtarenas -a 40M -b 8k arenas $vtmp/arena
echo '**********' FMTBLOOM
./o.fmtbloom -s 10M $vtmp/bloom
echo '**********' FMTISECT
./o.fmtisect -b 8k isect $vtmp/isect
(
echo index main
echo isect $vtmp/isect
echo arenas $vtmp/arena
echo bloom $vtmp/bloom
echo webroot $HOME/src/venti/www
echo mem 64M
echo icmem 64M
echo bcmem 64M
) >vtmp.conf
echo '**********' FMTINDEX
./o.fmtindex vtmp.conf
echo '**********' VENTI
# ./o.venti -c vtmp.conf -B 64M -I 64M -C 64M -a 'tcp!*!17034' -h 'tcp!*!8001' >a 2>&1 &
./o.venti -c vtmp.conf -a 'tcp!*!17034' -h 'tcp!*!8001' >a 2>&1 &
sleep 5
echo '**********' VAC
venti='tcp!127.0.0.1!17034' export venti
9 time vac /usr/local/plan9 >a.vac
case ${websync:-no} in
yes)
echo '**********' SYNC VIA WEB
hget http://127.0.0.1:8001/flushdcache
hget http://127.0.0.1:8001/flushicache
hget http://127.0.0.1:8001/flushdcache
echo '**********' KILL VENTI
killall -9 o.venti
;;
no)
echo '**********' KILL VENTI
killall -9 o.venti
echo '**********' SYNCINDEX
./o.syncindex -B64M -I64M -f vtmp.conf
;;
esac
echo '**********' CHECKINDEX
./o.checkindex -B64M vtmp.conf /home/tmp/check >check.out
wc check.out
luadisk.o: luadisk.c
gcc -c -ggdb -Wall -I/usr/include/lua50 luadisk.c
libluadisk.so: luadisk.o
gcc -shared -o $target luadisk.o -llua50 -llualib50
$O.xwrarena: xwrarena.$O
$LD -o $target xwrarena.$O

383
src/cmd/venti/srv/part.c Normal file
View File

@ -0,0 +1,383 @@
#ifdef PLAN9PORT /* SORRY! */
#include <u.h>
#include <sys/types.h>
#include <sys/vfs.h>
#endif
#include "stdinc.h"
#include <ctype.h>
#include "dat.h"
#include "fns.h"
u32int maxblocksize;
int readonly;
static int
strtoullsuf(char *p, char **pp, int rad, u64int *u)
{
u64int v;
if(!isdigit(*p))
return -1;
v = strtoull(p, &p, rad);
switch(*p){
case 'k':
case 'K':
v *= 1024;
p++;
break;
case 'm':
case 'M':
v *= 1024*1024;
p++;
break;
case 'g':
case 'G':
v *= 1024*1024*1024;
p++;
break;
case 't':
case 'T':
v *= 1024*1024;
v *= 1024*1024;
p++;
break;
}
*pp = p;
*u = v;
return 0;
}
static int
parsepart(char *name, char **file, u64int *lo, u64int *hi)
{
char *p;
*file = estrdup(name);
if((p = strrchr(*file, ':')) == nil){
*lo = 0;
*hi = 0;
return 0;
}
*p++ = 0;
if(*p == '-')
*lo = 0;
else{
if(strtoullsuf(p, &p, 0, lo) < 0){
free(*file);
return -1;
}
}
if(*p == '-')
p++;
if(*p == 0){
*hi = 0;
return 0;
}
if(strtoullsuf(p, &p, 0, hi) < 0 || *p != 0){
free(*file);
return -1;
}
return 0;
}
Part*
initpart(char *name, int mode)
{
Part *part;
Dir *dir;
char *file;
u64int lo, hi;
if(parsepart(name, &file, &lo, &hi) < 0)
return nil;
trace(TraceDisk, "initpart %s file %s lo 0x%llx hi 0x%llx", name, file, lo, hi);
part = MKZ(Part);
part->name = estrdup(name);
part->filename = estrdup(file);
if(readonly){
mode &= (OREAD|OWRITE|ORDWR);
mode |= OREAD;
}
part->fd = open(file, mode);
if(part->fd < 0){
if((mode&(OREAD|OWRITE|ORDWR)) == ORDWR)
part->fd = open(file, (mode&~ORDWR)|OREAD);
if(part->fd < 0){
freepart(part);
fprint(2, "can't open partition='%s': %r\n", file);
seterr(EOk, "can't open partition='%s': %r", file);
fprint(2, "%r\n");
free(file);
return nil;
}
fprint(2, "warning: %s opened for reading only\n", name);
}
part->offset = lo;
dir = dirfstat(part->fd);
if(dir == nil){
freepart(part);
seterr(EOk, "can't stat partition='%s': %r", file);
free(file);
return nil;
}
if(dir->length == 0){
free(dir);
freepart(part);
seterr(EOk, "can't determine size of partition %s", file);
free(file);
return nil;
}
if(dir->length < hi || dir->length < lo){
freepart(part);
seterr(EOk, "partition '%s': bounds out of range (max %lld)", name, dir->length);
free(dir);
free(file);
return nil;
}
if(hi == 0)
hi = dir->length;
part->size = hi - part->offset;
#ifdef _LIBC_H_
{
struct statfs sfs;
if(fstatfs(part->fd, &sfs) >= 0)
part->fsblocksize = sfs.f_bsize;
}
#endif
free(dir);
return part;
}
void
freepart(Part *part)
{
if(part == nil)
return;
if(part->fd >= 0)
close(part->fd);
free(part->name);
free(part);
}
void
partblocksize(Part *part, u32int blocksize)
{
if(part->blocksize)
sysfatal("resetting partition=%s's block size", part->name);
part->blocksize = blocksize;
if(blocksize > maxblocksize)
maxblocksize = blocksize;
}
/*
* Read/write some amount of data between a block device or file and a memory buffer.
*
* Most Unix systems require that when accessing a block device directly,
* the buffer, offset, and count are all multiples of the device block size,
* making this a lot more complicated than it otherwise would be.
*
* Most of our callers will make things easy on us, but for some callers it's best
* if we just do the work here, with only one place to get it right (hopefully).
*
* If everything is aligned properly, prwb will try to do big transfers in the main
* body of the loop: up to MaxIo bytes at a time. If everything isn't aligned properly,
* we work one block at a time.
*/
#undef min
#define min(a, b) ((a) < (b) ? (a) : (b))
int
prwb(char *name, int fd, int isread, u64int offset, void *vbuf, u32int count, u32int blocksize)
{
char *op;
u8int *buf, *tmp, *freetmp, *dst;
u32int c, delta, icount, opsize;
int r;
buf = vbuf;
tmp = nil;
freetmp = nil;
icount = count;
opsize = blocksize;
if(count == 0){
logerr(EStrange, "pwrb %s called to %s 0 bytes", name, isread ? "read" : "write");
return 0;
}
assert(blocksize > 0);
/* allocate blocksize-aligned temp buffer if needed */
if((ulong)offset%blocksize || (ulong)buf%blocksize || count%blocksize){
if((freetmp = malloc(blocksize*2)) == nil)
return -1;
tmp = freetmp;
tmp += blocksize - (ulong)tmp%blocksize;
}
/* handle beginning fringe */
if((delta = (ulong)offset%blocksize) != 0){
assert(tmp != nil);
if((r=pread(fd, tmp, blocksize, offset-delta)) != blocksize){
dst = tmp;
offset = offset-delta;
op = "read";
goto Error;
}
c = min(count, blocksize-delta);
assert(c > 0 && c < blocksize);
if(isread)
memmove(buf, tmp+delta, c);
else{
memmove(tmp+delta, buf, c);
if((r=pwrite(fd, tmp, blocksize, offset-delta)) != blocksize){
dst = tmp;
offset = offset-delta;
op = "read";
goto Error;
}
}
assert(c > 0);
offset += c;
buf += c;
count -= c;
}
/* handle full blocks */
while(count >= blocksize){
assert((ulong)offset%blocksize == 0);
if((ulong)buf%blocksize){
assert(tmp != nil);
dst = tmp;
opsize = blocksize;
}else{
dst = buf;
opsize = count - count%blocksize;
if(opsize > MaxIo)
opsize = MaxIo;
}
if(isread){
if((r=pread(fd, dst, opsize, offset))<=0 || r%blocksize){
op = "read";
goto Error;
}
if(dst == tmp){
assert(r == blocksize);
memmove(buf, tmp, blocksize);
}
}else{
if(dst == tmp){
assert(opsize == blocksize);
memmove(dst, buf, blocksize);
}
if((r=pwrite(fd, dst, opsize, offset))<=0 || r%blocksize){
op = "write";
goto Error;
}
if(dst == tmp)
assert(r == blocksize);
}
assert(r > 0);
offset += r;
buf += r;
count -= r;
}
/* handle ending fringe */
if(count > 0){
assert((ulong)offset%blocksize == 0);
assert(tmp != nil);
/*
* Complicated condition: if we're reading it's okay to get less than
* a block as long as it's enough to satisfy the read - maybe this is
* a normal file. (We never write to normal files, or else things would
* be even more complicated.)
*/
r = pread(fd, tmp, blocksize, offset);
if((isread && r < count) || (!isread && r != blocksize)){
print("FAILED isread=%d r=%d count=%d blocksize=%d\n", isread, r, count, blocksize);
dst = tmp;
op = "read";
goto Error;
}
if(isread)
memmove(buf, tmp, count);
else{
memmove(tmp, buf, count);
if(pwrite(fd, tmp, opsize, offset) != blocksize){
dst = tmp;
op = "write";
goto Error;
}
}
}
if(freetmp)
free(freetmp);
return icount;
Error:
seterr(EAdmin, "%s %s offset 0x%llux count %ud buf %p returned %d: %r",
op, name, offset, opsize, dst, r);
if(freetmp)
free(freetmp);
return -1;
}
int
rwpart(Part *part, int isread, u64int offset, u8int *buf, u32int count)
{
u32int blocksize;
trace(TraceDisk, "%s %s %ud at 0x%llx",
isread ? "read" : "write", part->name, count, offset);
if(offset >= part->size || offset+count > part->size){
seterr(EStrange, "out of bounds %s offset 0x%llux count %ud to partition %s size 0x%llux",
isread ? "read" : "write", offset, count, part->name, part->size);
return -1;
}
blocksize = part->fsblocksize;
if(blocksize == 0)
blocksize = part->blocksize;
if(blocksize == 0)
blocksize = 4096;
return prwb(part->filename, part->fd, isread, part->offset+offset, buf, count, blocksize);
}
int
readpart(Part *part, u64int offset, u8int *buf, u32int count)
{
return rwpart(part, 1, offset, buf, count);
}
int
writepart(Part *part, u64int offset, u8int *buf, u32int count)
{
return rwpart(part, 0, offset, buf, count);
}
ZBlock*
readfile(char *name)
{
Part *p;
ZBlock *b;
p = initpart(name, OREAD);
if(p == nil)
return nil;
b = alloczblock(p->size, 0, p->blocksize);
if(b == nil){
seterr(EOk, "can't alloc %s: %r", name);
freepart(p);
return nil;
}
if(readpart(p, 0, b->data, p->size) < 0){
seterr(EOk, "can't read %s: %r", name);
freepart(p);
freezblock(b);
return nil;
}
freepart(p);
return b;
}

241
src/cmd/venti/srv/png.c Normal file
View File

@ -0,0 +1,241 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
enum
{
IDATSIZE = 20000,
FilterNone = 0
};
typedef struct ZlibR ZlibR;
typedef struct ZlibW ZlibW;
struct ZlibR
{
uchar *data;
int width;
int dx;
int dy;
int x;
int y;
int pixwid;
};
struct ZlibW
{
Hio *io;
uchar *buf;
uchar *b;
uchar *e;
};
static ulong *crctab;
static uchar PNGmagic[] = { 137, 'P', 'N', 'G', '\r', '\n', 26, '\n'};
static void
put4(uchar *a, ulong v)
{
a[0] = v>>24;
a[1] = v>>16;
a[2] = v>>8;
a[3] = v;
}
static void
chunk(Hio *io, char *type, uchar *d, int n)
{
uchar buf[4];
ulong crc = 0;
if(strlen(type) != 4)
return;
put4(buf, n);
hwrite(io, buf, 4);
hwrite(io, type, 4);
hwrite(io, d, n);
crc = blockcrc(crctab, crc, type, 4);
crc = blockcrc(crctab, crc, d, n);
put4(buf, crc);
hwrite(io, buf, 4);
}
static int
zread(void *va, void *buf, int n)
{
int a, i, pixels, pixwid;
uchar *b, *e, *img;
ZlibR *z;
z = va;
pixwid = z->pixwid;
b = buf;
e = b+n;
while(b+pixwid <= e){
if(z->y >= z->dy)
break;
if(z->x == 0)
*b++ = FilterNone;
pixels = (e-b)/pixwid;
if(pixels > z->dx - z->x)
pixels = z->dx - z->x;
img = z->data + z->width*z->y + pixwid*z->x;
memmove(b, img, pixwid*pixels);
if(pixwid == 4){
/*
* Convert to non-premultiplied alpha.
*/
for(i=0; i<pixels; i++, b+=4){
a = b[3];
if(a == 255 || a == 0)
;
else{
if(b[0] >= a)
b[0] = a;
b[0] = (b[0]*255)/a;
if(b[1] >= a)
b[1] = a;
b[1] = (b[1]*255)/a;
if(b[2] >= a)
b[2] = a;
b[2] = (b[2]*255)/a;
}
}
}else
b += pixwid*pixels;
z->x += pixels;
if(z->x >= z->dx){
z->x = 0;
z->y++;
}
}
return b - (uchar*)buf;
}
static void
IDAT(ZlibW *z)
{
chunk(z->io, "IDAT", z->buf, z->b - z->buf);
z->b = z->buf;
}
static int
zwrite(void *va, void *buf, int n)
{
int m;
uchar *b, *e;
ZlibW *z;
z = va;
b = buf;
e = b+n;
while(b < e){
m = z->e - z->b;
if(m > e - b)
m = e - b;
memmove(z->b, b, m);
z->b += m;
b += m;
if(z->b >= z->e)
IDAT(z);
}
return n;
}
static Memimage*
memRGBA(Memimage *i)
{
Memimage *ni;
char buf[32];
ulong dst;
/*
* [A]BGR because we want R,G,B,[A] in big-endian order. Sigh.
*/
chantostr(buf, i->chan);
if(strchr(buf, 'a'))
dst = ABGR32;
else
dst = BGR24;
if(i->chan == dst)
return i;
qlock(&memdrawlock);
ni = allocmemimage(i->r, dst);
if(ni)
memimagedraw(ni, ni->r, i, i->r.min, nil, i->r.min, S);
qunlock(&memdrawlock);
return ni;
}
int
writepng(Hio *io, Memimage *m)
{
static int first = 1;
static QLock lk;
uchar buf[200], *h;
Memimage *rgb;
ZlibR zr;
ZlibW zw;
if(first){
qlock(&lk);
if(first){
deflateinit();
crctab = mkcrctab(0xedb88320);
first = 0;
}
qunlock(&lk);
}
rgb = memRGBA(m);
if(rgb == nil)
return -1;
hwrite(io, PNGmagic, sizeof PNGmagic);
/* IHDR chunk */
h = buf;
put4(h, Dx(m->r)); h += 4;
put4(h, Dy(m->r)); h += 4;
*h++ = 8; /* 8 bits per channel */
if(rgb->chan == BGR24)
*h++ = 2; /* RGB */
else
*h++ = 6; /* RGBA */
*h++ = 0; /* compression - deflate */
*h++ = 0; /* filter - none */
*h++ = 0; /* interlace - none */
chunk(io, "IHDR", buf, h-buf);
/* image data */
zr.dx = Dx(m->r);
zr.dy = Dy(m->r);
zr.width = rgb->width * sizeof(ulong);
zr.data = rgb->data->bdata;
zr.x = 0;
zr.y = 0;
zr.pixwid = chantodepth(rgb->chan)/8;
zw.io = io;
zw.buf = vtmalloc(IDATSIZE);
zw.b = zw.buf;
zw.e = zw.b + IDATSIZE;
if(deflatezlib(&zw, zwrite, &zr, zread, 6, 0) < 0){
free(zw.buf);
return -1;
}
if(zw.b > zw.buf)
IDAT(&zw);
free(zw.buf);
chunk(io, "IEND", nil, 0);
if(m != rgb){
qlock(&memdrawlock);
freememimage(rgb);
qunlock(&memdrawlock);
}
return 0;
}

View File

@ -0,0 +1,130 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
void
usage(void)
{
fprint(2, "usage: printarena arenafile [offset]\n");
threadexitsall("usage");
}
static void
rdarena(Arena *arena, u64int offset)
{
u64int a, aa, e;
u32int magic;
Clump cl;
uchar score[VtScoreSize];
ZBlock *lump;
printarena(2, arena);
a = arena->base;
e = arena->base + arena->size;
if(offset != ~(u64int)0) {
if(offset >= e-a)
sysfatal("bad offset %llud >= %llud\n",
offset, e-a);
aa = offset;
} else
aa = 0;
for(; aa < e; aa += ClumpSize+cl.info.size) {
magic = clumpmagic(arena, aa);
if(magic == ClumpFreeMagic)
break;
if(magic != arena->clumpmagic) {
fprint(2, "illegal clump magic number %#8.8ux offset %llud\n",
magic, aa);
break;
}
lump = loadclump(arena, aa, 0, &cl, score, 0);
if(lump == nil) {
fprint(2, "clump %llud failed to read: %r\n", aa);
break;
}
if(cl.info.type != VtCorruptType) {
scoremem(score, lump->data, cl.info.uncsize);
if(scorecmp(cl.info.score, score) != 0) {
fprint(2, "clump %llud has mismatched score\n", aa);
break;
}
if(vttypevalid(cl.info.type) < 0) {
fprint(2, "clump %llud has bad type %d\n", aa, cl.info.type);
break;
}
}
print("%22llud %V %3d %5d\n", aa, score, cl.info.type, cl.info.uncsize);
freezblock(lump);
}
print("end offset %llud\n", aa);
}
void
threadmain(int argc, char *argv[])
{
char *file;
Arena *arena;
u64int offset, aoffset;
Part *part;
Dir *d;
uchar buf[8192];
ArenaHead head;
readonly = 1; /* for part.c */
aoffset = 0;
ARGBEGIN{
case 'o':
aoffset = strtoull(EARGF(usage()), 0, 0);
break;
default:
usage();
break;
}ARGEND
offset = ~(u64int)0;
switch(argc) {
default:
usage();
case 2:
offset = strtoull(argv[1], 0, 0);
/* fall through */
case 1:
file = argv[0];
}
ventifmtinstall();
statsinit();
if((d = dirstat(file)) == nil)
sysfatal("can't stat file %s: %r", file);
part = initpart(file, OREAD|ODIRECT);
if(part == nil)
sysfatal("can't open file %s: %r", file);
if(readpart(part, aoffset, buf, sizeof buf) < 0)
sysfatal("can't read file %s: %r", file);
if(unpackarenahead(&head, buf) < 0)
sysfatal("corrupted arena header: %r");
print("# arena head version=%d name=%.*s blocksize=%d size=%lld clumpmagic=0x%.8ux\n",
head.version, ANameSize, head.name, head.blocksize,
head.size, head.clumpmagic);
if(aoffset+head.size > d->length)
sysfatal("arena is truncated: want %llud bytes have %llud\n",
head.size, d->length);
partblocksize(part, head.blocksize);
initdcache(8 * MaxDiskBlock);
arena = initarena(part, aoffset, head.size, head.blocksize);
if(arena == nil)
sysfatal("initarena: %r");
rdarena(arena, offset);
threadexitsall(0);
}

View File

@ -0,0 +1,113 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
#include <bio.h>
Biobuf bout;
static void
pie(IEntry *ie)
{
Bprint(&bout, "%22lld %V %3d %5d\n",
ie->ia.addr, ie->score, ie->ia.type, ie->ia.size);
}
void
usage(void)
{
fprint(2, "usage: printarenas [-B blockcachesize] config [arenaname...]\n");
threadexitsall(0);
}
Config conf;
int
shoulddump(char *name, int argc, char **argv)
{
int i;
if(argc == 0)
return 1;
for(i=0; i<argc; i++)
if(strcmp(name, argv[i]) == 0)
return 1;
return 0;
}
enum
{
ClumpChunks = 32*1024,
};
void
dumparena(Arena *arena, u64int a)
{
IEntry ie;
ClumpInfo *ci, *cis;
u32int clump;
int i, n, nskip;
cis = MKN(ClumpInfo, ClumpChunks);
nskip = 0;
memset(&ie, 0, sizeof(IEntry));
for(clump = 0; clump < arena->memstats.clumps; clump += n){
n = ClumpChunks;
if(n > arena->memstats.clumps - clump)
n = arena->memstats.clumps - clump;
if(readclumpinfos(arena, clump, cis, n) != n){
fprint(2, "arena directory read failed: %r\n");
break;
}
for(i = 0; i < n; i++){
ci = &cis[i];
ie.ia.type = ci->type;
ie.ia.size = ci->uncsize;
ie.ia.addr = a;
a += ci->size + ClumpSize;
ie.ia.blocks = (ci->size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
scorecp(ie.score, ci->score);
pie(&ie);
}
}
free(cis);
}
void
threadmain(int argc, char *argv[])
{
int i;
Index *ix;
u32int bcmem;
bcmem = 0;
ARGBEGIN{
case 'B':
bcmem = unittoull(ARGF());
break;
default:
usage();
break;
}ARGEND
if(argc < 1)
usage();
ventifmtinstall();
if(initventi(argv[0], &conf) < 0)
sysfatal("can't init venti: %r");
if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
initdcache(bcmem);
Binit(&bout, 1, OWRITE);
ix = mainindex;
for(i=0; i<ix->narenas; i++)
if(shoulddump(ix->arenas[i]->name, argc-1, argv+1))
dumparena(ix->arenas[i], ix->amap[i].start);
Bterm(&bout);
threadexitsall(0);
}

View File

@ -0,0 +1,99 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
#include <bio.h>
Biobuf bout;
static void
pie(IEntry *ie)
{
Bprint(&bout, "%22lld %V %3d %5d\n",
ie->ia.addr, ie->score, ie->ia.type, ie->ia.size);
}
void
usage(void)
{
fprint(2, "usage: printindex [-B blockcachesize] config [isectname...]\n");
threadexitsall(0);
}
Config conf;
int
shoulddump(char *name, int argc, char **argv)
{
int i;
if(argc == 0)
return 1;
for(i=0; i<argc; i++)
if(strcmp(name, argv[i]) == 0)
return 1;
return 0;
}
void
dumpisect(ISect *is)
{
int j;
uchar *buf;
u32int i;
u64int off;
IBucket ib;
IEntry ie;
buf = emalloc(is->blocksize);
for(i=0; i<is->blocks; i++){
off = is->blockbase+(u64int)is->blocksize*i;
if(readpart(is->part, off, buf, is->blocksize) < 0)
fprint(2, "read %s at 0x%llux: %r\n", is->part->name, off);
else{
unpackibucket(&ib, buf, is->bucketmagic);
for(j=0; j<ib.n; j++){
unpackientry(&ie, &ib.data[j*IEntrySize]);
pie(&ie);
}
}
}
}
void
threadmain(int argc, char *argv[])
{
int i;
Index *ix;
u32int bcmem;
bcmem = 0;
ARGBEGIN{
case 'B':
bcmem = unittoull(ARGF());
break;
default:
usage();
break;
}ARGEND
if(argc < 1)
usage();
fmtinstall('H', encodefmt);
if(initventi(argv[0], &conf) < 0)
sysfatal("can't init venti: %r");
if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
initdcache(bcmem);
ix = mainindex;
Binit(&bout, 1, OWRITE);
for(i=0; i<ix->nsects; i++)
if(shoulddump(ix->sects[i]->name, argc-1, argv+1))
dumpisect(ix->sects[i]);
Bterm(&bout);
threadexitsall(0);
}

View File

@ -0,0 +1,42 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
void
usage(void)
{
fprint(2, "usage: printmap [-B blockcachesize] config\n");
threadexitsall("usage");
}
Config conf;
void
threadmain(int argc, char *argv[])
{
u32int bcmem;
int fix;
fix = 0;
bcmem = 0;
ARGBEGIN{
case 'B':
bcmem = unittoull(ARGF());
break;
default:
usage();
break;
}ARGEND
if(!fix)
readonly = 1;
if(argc != 1)
usage();
if(initventi(argv[0], &conf) < 0)
sysfatal("can't init venti: %r");
printindex(1, mainindex);
threadexitsall(0);
}

View File

@ -0,0 +1,91 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
static int verbose;
void
usage(void)
{
fprint(2, "usage: rdarena [-v] arenapart arena\n");
threadexitsall(0);
}
static void
rdarena(Arena *arena)
{
ZBlock *b;
u64int a, e;
u32int bs;
fprint(2, "copying %s to standard output\n", arena->name);
printarena(2, arena);
bs = MaxIoSize;
if(bs < arena->blocksize)
bs = arena->blocksize;
b = alloczblock(bs, 0, arena->blocksize);
e = arena->base + arena->size + arena->blocksize;
for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a += bs){
if(a + bs > e)
bs = arena->blocksize;
if(readpart(arena->part, a, b->data, bs) < 0)
fprint(2, "can't copy %s, read at %lld failed: %r\n", arena->name, a);
if(write(1, b->data, bs) != bs)
sysfatal("can't copy %s, write at %lld failed: %r", arena->name, a);
}
freezblock(b);
}
void
threadmain(int argc, char *argv[])
{
ArenaPart *ap;
Part *part;
char *file, *aname;
int i;
ventifmtinstall();
statsinit();
ARGBEGIN{
case 'v':
verbose++;
break;
default:
usage();
break;
}ARGEND
readonly = 1;
if(argc != 2)
usage();
file = argv[0];
aname = argv[1];
part = initpart(file, OREAD|ODIRECT);
if(part == nil)
sysfatal("can't open partition %s: %r", file);
ap = initarenapart(part);
if(ap == nil)
sysfatal("can't initialize arena partition in %s: %r", file);
if(verbose)
printarenapart(2, ap);
initdcache(8 * MaxDiskBlock);
for(i = 0; i < ap->narenas; i++){
if(strcmp(ap->arenas[i]->name, aname) == 0){
rdarena(ap->arenas[i]);
threadexitsall(0);
}
}
sysfatal("couldn't find arena %s\n", aname);
}

102
src/cmd/venti/srv/round.c Normal file
View File

@ -0,0 +1,102 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
void
waitforkick(Round *r)
{
int n;
qlock(&r->lock);
r->last = r->current;
assert(r->current+1 == r->next);
rwakeupall(&r->finish);
while(!r->doanother)
rsleep(&r->start);
n = r->next++;
r->current = n;
r->doanother = 0;
qunlock(&r->lock);
}
static void
_kickround(Round *r, int wait)
{
int n;
if(!r->doanother)
trace(TraceProc, "kick %s", r->name);
r->doanother = 1;
rwakeup(&r->start);
if(wait){
n = r->next;
while((int)(n - r->last) > 0){
r->doanother = 1;
rwakeup(&r->start);
rsleep(&r->finish);
}
}
}
void
kickround(Round *r, int wait)
{
qlock(&r->lock);
_kickround(r, wait);
qunlock(&r->lock);
}
void
initround(Round *r, char *name, int delay)
{
memset(r, 0, sizeof *r);
r->name = name;
r->start.l = &r->lock;
r->finish.l = &r->lock;
r->delaywait.l = &r->lock;
r->last = 0;
r->current = 0;
r->next = 1;
r->doanother = 0;
r->delaytime = delay;
}
void
delaykickround(Round *r)
{
qlock(&r->lock);
r->delaykick = 1;
rwakeup(&r->delaywait);
qunlock(&r->lock);
}
void
delaykickroundproc(void *v)
{
Round *r = v;
int n;
threadsetname("delaykickproc %s", r->name);
qlock(&r->lock);
for(;;){
while(r->delaykick == 0){
trace(TraceProc, "sleep");
rsleep(&r->delaywait);
}
n = r->next;
qunlock(&r->lock);
trace(TraceProc, "waitround 0x%ux", (uint)n);
sleep(r->delaytime);
qlock(&r->lock);
if(n == r->next){
trace(TraceProc, "kickround 0x%ux", (uint)n);
_kickround(r, 1);
}
trace(TraceProc, "finishround 0x%ux", (uint)n);
}
}

43
src/cmd/venti/srv/score.c Normal file
View File

@ -0,0 +1,43 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
u8int zeroscore[VtScoreSize];
void
scoremem(u8int *score, u8int *buf, int n)
{
DigestState s;
memset(&s, 0, sizeof s);
sha1(buf, n, score, &s);
}
static int
hexv(int c)
{
if(c >= '0' && c <= '9')
return c - '0';
if(c >= 'a' && c <= 'f')
return c - 'a' + 10;
if(c >= 'A' && c <= 'F')
return c - 'A' + 10;
return -1;
}
int
strscore(char *s, u8int *score)
{
int i, c, d;
for(i = 0; i < VtScoreSize; i++){
c = hexv(s[2 * i]);
if(c < 0)
return -1;
d = hexv(s[2 * i + 1]);
if(d < 0)
return -1;
score[i] = (c << 4) + d;
}
return s[2 * i] == '\0';
}

View File

@ -0,0 +1,376 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
#include <bio.h>
typedef struct IEBuck IEBuck;
typedef struct IEBucks IEBucks;
enum
{
ClumpChunks = 32*1024
};
struct IEBuck
{
u32int head; /* head of chain of chunks on the disk */
u32int used; /* usage of the last chunk */
u64int total; /* total number of bytes in this bucket */
u8int *buf; /* chunk of entries for this bucket */
};
struct IEBucks
{
Part *part;
u64int off; /* offset for writing data in the partition */
u32int chunks; /* total chunks written to fd */
u64int max; /* max bytes entered in any one bucket */
int bits; /* number of bits in initial bucket sort */
int nbucks; /* 1 << bits, the number of buckets */
u32int size; /* bytes in each of the buckets chunks */
u32int usable; /* amount usable for IEntry data */
u8int *buf; /* buffer for all chunks */
u8int *xbuf;
IEBuck *bucks;
};
#define U32GET(p) (((p)[0]<<24)|((p)[1]<<16)|((p)[2]<<8)|(p)[3])
#define U32PUT(p,v) (p)[0]=(v)>>24;(p)[1]=(v)>>16;(p)[2]=(v)>>8;(p)[3]=(v)
static IEBucks *initiebucks(Part *part, int bits, u32int size);
static int flushiebuck(IEBucks *ib, int b, int reset);
static int flushiebucks(IEBucks *ib);
static u32int sortiebuck(IEBucks *ib, int b);
static u64int sortiebucks(IEBucks *ib);
static int sprayientry(IEBucks *ib, IEntry *ie);
static u32int readarenainfo(IEBucks *ib, Arena *arena, u64int a, Bloom *b);
static u32int readiebuck(IEBucks *ib, int b);
static void freeiebucks(IEBucks *ib);
/*
* build a sorted file with all IEntries which should be in ix.
* assumes the arenas' directories are up to date.
* reads each, converts the entries to index entries,
* and sorts them.
*/
u64int
sortrawientries(Index *ix, Part *tmp, u64int *base, Bloom *bloom)
{
IEBucks *ib;
u64int clumps, sorted;
u32int n;
int i, ok;
//ZZZ should allow configuration of bits, bucket size
ib = initiebucks(tmp, 8, 64*1024);
if(ib == nil){
seterr(EOk, "can't create sorting buckets: %r");
return TWID64;
}
ok = 0;
clumps = 0;
fprint(2, "constructing entry list\n");
for(i = 0; i < ix->narenas; i++){
n = readarenainfo(ib, ix->arenas[i], ix->amap[i].start, bloom);
if(n == TWID32){
ok = -1;
break;
}
clumps += n;
}
fprint(2, "sorting %lld entries\n", clumps);
if(ok == 0){
sorted = sortiebucks(ib);
*base = (u64int)ib->chunks * ib->size;
if(sorted != clumps){
fprint(2, "sorting messed up: clumps=%lld sorted=%lld\n", clumps, sorted);
ok = -1;
}
}
freeiebucks(ib);
if(ok < 0)
return TWID64;
return clumps;
}
#define CHECK(cis) if(((ulong*)cis)[-4] != 0xA110C09) xabort();
void
xabort(void)
{
int *x;
x = 0;
*x = 0;
}
/*
* read in all of the arena's clump directory,
* convert to IEntry format, and bucket sort based
* on the first few bits.
*/
static u32int
readarenainfo(IEBucks *ib, Arena *arena, u64int a, Bloom *b)
{
IEntry ie;
ClumpInfo *ci, *cis;
u32int clump;
int i, n, ok, nskip;
// static Biobuf bout;
//ZZZ remove fprint?
//fprint(2, "ra %s %d %d\n", arena->name, arena->memstats.clumps, arena->diskstats.clumps);
if(arena->memstats.clumps)
fprint(2, "\tarena %s: %d entries\n", arena->name, arena->memstats.clumps);
else
fprint(2, "[%s] ", arena->name);
cis = MKN(ClumpInfo, ClumpChunks);
ok = 0;
nskip = 0;
memset(&ie, 0, sizeof(IEntry));
// Binit(&bout, 1, OWRITE);
for(clump = 0; clump < arena->memstats.clumps; clump += n){
n = ClumpChunks;
if(n > arena->memstats.clumps - clump)
n = arena->memstats.clumps - clump;
if(readclumpinfos(arena, clump, cis, n) != n){
seterr(EOk, "arena directory read failed: %r");
ok = -1;
break;
}
for(i = 0; i < n; i++){
ci = &cis[i];
ie.ia.type = ci->type;
ie.ia.size = ci->uncsize;
ie.ia.addr = a;
a += ci->size + ClumpSize;
ie.ia.blocks = (ci->size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
scorecp(ie.score, ci->score);
// Bprint(&bout, "%22lld %V %3d %5d\n",
// ie.ia.addr, ie.score, ie.ia.type, ie.ia.size);
if(ci->type == VtCorruptType){
// print("! %V %22lld %3d %5d %3d\n",
// ie.score, ie.ia.addr, ie.ia.type, ie.ia.size, ie.ia.blocks);
nskip++;
}else
sprayientry(ib, &ie);
markbloomfilter(b, ie.score);
}
}
// Bterm(&bout);
free(cis);
if(ok < 0)
return TWID32;
return clump - nskip;
}
/*
* initialize the external bucket sorting data structures
*/
static IEBucks*
initiebucks(Part *part, int bits, u32int size)
{
IEBucks *ib;
int i;
ib = MKZ(IEBucks);
if(ib == nil){
seterr(EOk, "out of memory");
return nil;
}
ib->bits = bits;
ib->nbucks = 1 << bits;
ib->size = size;
ib->usable = (size - U32Size) / IEntrySize * IEntrySize;
ib->bucks = MKNZ(IEBuck, ib->nbucks);
if(ib->bucks == nil){
seterr(EOk, "out of memory allocation sorting buckets");
freeiebucks(ib);
return nil;
}
ib->xbuf = MKN(u8int, size * ((1 << bits)+1));
ib->buf = (u8int*)(((ulong)ib->xbuf+size-1)&~(ulong)(size-1));
if(ib->buf == nil){
seterr(EOk, "out of memory allocating sorting buckets' buffers");
freeiebucks(ib);
return nil;
}
for(i = 0; i < ib->nbucks; i++){
ib->bucks[i].head = TWID32;
ib->bucks[i].buf = &ib->buf[i * size];
}
ib->part = part;
return ib;
}
static void
freeiebucks(IEBucks *ib)
{
if(ib == nil)
return;
free(ib->bucks);
free(ib->buf);
free(ib);
}
/*
* initial sort: put the entry into the correct bucket
*/
static int
sprayientry(IEBucks *ib, IEntry *ie)
{
u32int n;
int b;
b = hashbits(ie->score, ib->bits);
n = ib->bucks[b].used;
if(n + IEntrySize > ib->usable){
/* should be flushed below, but if flush fails, this can happen */
seterr(EOk, "out of space in bucket");
return -1;
}
packientry(ie, &ib->bucks[b].buf[n]);
n += IEntrySize;
ib->bucks[b].used = n;
if(n + IEntrySize <= ib->usable)
return 0;
return flushiebuck(ib, b, 1);
}
/*
* finish sorting:
* for each bucket, read it in and sort it
* write out the the final file
*/
static u64int
sortiebucks(IEBucks *ib)
{
u64int tot;
u32int n;
int i;
if(flushiebucks(ib) < 0)
return TWID64;
for(i = 0; i < ib->nbucks; i++)
ib->bucks[i].buf = nil;
ib->off = (u64int)ib->chunks * ib->size;
free(ib->xbuf);
if(0){
fprint(2, "ib->max = %lld\n", ib->max);
fprint(2, "ib->chunks = %ud\n", ib->chunks);
}
ib->buf = MKN(u8int, ib->max + U32Size);
if(ib->buf == nil){
seterr(EOk, "out of memory allocating final sorting buffer; try more buckets");
return TWID64;
}
tot = 0;
for(i = 0; i < ib->nbucks; i++){
n = sortiebuck(ib, i);
if(n == TWID32)
return TWID64;
if(n != ib->bucks[i].total/IEntrySize)
fprint(2, "bucket %d changed count %d => %d\n",
i, (int)(ib->bucks[i].total/IEntrySize), n);
tot += n;
}
return tot;
return 0;
}
/*
* sort from bucket b of ib into the output file to
*/
static u32int
sortiebuck(IEBucks *ib, int b)
{
u32int n;
n = readiebuck(ib, b);
if(n == TWID32)
return TWID32;
qsort(ib->buf, n, IEntrySize, ientrycmp);
if(writepart(ib->part, ib->off, ib->buf, n * IEntrySize) < 0){
seterr(EOk, "can't write sorted bucket: %r");
return TWID32;
}
ib->off += n * IEntrySize;
return n;
}
/*
* write out a single bucket
*/
static int
flushiebuck(IEBucks *ib, int b, int reset)
{
u32int n;
if(ib->bucks[b].used == 0)
return 0;
n = ib->bucks[b].used;
U32PUT(&ib->bucks[b].buf[n], ib->bucks[b].head);
n += U32Size;
USED(n);
if(writepart(ib->part, (u64int)ib->chunks * ib->size, ib->bucks[b].buf, ib->size) < 0){
seterr(EOk, "can't write sorting bucket to file: %r");
xabort();
return -1;
}
ib->bucks[b].head = ib->chunks++;
ib->bucks[b].total += ib->bucks[b].used;
if(reset)
ib->bucks[b].used = 0;
return 0;
}
/*
* write out all of the buckets, and compute
* the maximum size of any bucket
*/
static int
flushiebucks(IEBucks *ib)
{
int i;
for(i = 0; i < ib->nbucks; i++){
if(flushiebuck(ib, i, 0) < 0)
return -1;
if(ib->bucks[i].total > ib->max)
ib->max = ib->bucks[i].total;
}
return 0;
}
/*
* read in the chained buffers for bucket b,
* and return it's total number of IEntries
*/
static u32int
readiebuck(IEBucks *ib, int b)
{
u32int head, m, n;
head = ib->bucks[b].head;
n = 0;
m = ib->bucks[b].used;
if(m == 0)
m = ib->usable;
// if(ib->bucks[b].total)
// fprint(2, "\tbucket %d: %d entries\n", b, ib->bucks[b].total/IEntrySize);
while(head != TWID32){
if(readpart(ib->part, (u64int)head * ib->size, &ib->buf[n], m + U32Size) < 0){
seterr(EOk, "can't read index sort bucket: %r");
return TWID32;
}
n += m;
head = U32GET(&ib->buf[n]);
m = ib->usable;
}
if(n != ib->bucks[b].total)
fprint(2, "\tbucket %d: expected %d entries, got %d\n",
b, (int)ib->bucks[b].total/IEntrySize, n/IEntrySize);
return n / IEntrySize;
}

212
src/cmd/venti/srv/stats.c Normal file
View File

@ -0,0 +1,212 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
int collectstats = 1;
/* keep in sync with dat.h:/NStat */
Statdesc statdesc[NStat] =
{
{ "rpc total", },
{ "rpc reads", },
{ "rpc reads ok", },
{ "rpc reads failed", },
{ "rpc read bytes", },
{ "rpc read time", },
{ "rpc read cached", },
{ "rpc read cached time", },
{ "rpc read uncached", },
{ "rpc read uncached time "},
{ "rpc writes", },
{ "rpc writes new", },
{ "rpc writes old", },
{ "rpc writes failed", },
{ "rpc write bytes", },
{ "rpc write time", },
{ "rpc write new time", },
{ "rpc write old time", },
{ "lump cache hits", },
{ "lump cache misses", },
{ "lump cache reads", },
{ "lump cache writes", },
{ "lump cache size", },
{ "lump cache stall", },
{ "lump cache read time", },
{ "disk cache hits", },
{ "disk cache misses", },
{ "disk cache lookups", },
{ "disk cache reads", },
{ "disk cache writes", },
{ "disk cache dirty", },
{ "disk cache size", },
{ "disk cache flushes", },
{ "disk cache stalls", },
{ "disk cache lookup time", },
{ "disk block stalls", },
{ "lump stalls", },
{ "index cache hits", },
{ "index cache misses", },
{ "index cache reads", },
{ "index cache writes", },
{ "index cache fills", },
{ "index cache prefetches", },
{ "index cache dirty", },
{ "index cache size", },
{ "index cache flushes", },
{ "index cache stalls", },
{ "index cache read time", },
{ "bloom filter hits", },
{ "bloom filter misses", },
{ "bloom filter false misses", },
{ "bloom filter lookups", },
{ "bloom filter ones", },
{ "bloom filter bits", },
{ "bloom filter lookup time", },
{ "arena block reads", },
{ "arena block read bytes", },
{ "arena block writes", },
{ "arena block write bytes", },
{ "isect block reads", },
{ "isect block read bytes", },
{ "isect block writes", },
{ "isect block write bytes", },
{ "sum reads", },
{ "sum read bytes", },
};
QLock statslock;
Stats stats;
Stats *stathist;
int nstathist;
ulong statind;
ulong stattime;
void
statsproc(void *v)
{
USED(v);
for(;;){
stats.now = time(0);
stathist[stattime%nstathist] = stats;
stattime++;
sleep(1000);
}
}
void
statsinit(void)
{
nstathist = 90000;
stathist = MKNZ(Stats, nstathist);
vtproc(statsproc, nil);
}
void
setstat(int index, long val)
{
qlock(&statslock);
stats.n[index] = val;
qunlock(&statslock);
}
void
addstat(int index, int inc)
{
if(!collectstats)
return;
qlock(&statslock);
stats.n[index] += inc;
qunlock(&statslock);
}
void
addstat2(int index, int inc, int index1, int inc1)
{
if(!collectstats)
return;
qlock(&statslock);
stats.n[index] += inc;
stats.n[index1] += inc1;
qunlock(&statslock);
}
void
printstats(void)
{
}
void
binstats(long (*fn)(Stats *s0, Stats *s1, void *arg), void *arg,
long t0, long t1, Statbin *bin, int nbin)
{
long t, xt0, te, v;
int i, j, lo, hi, m, oj;
vlong tot;
Statbin *b;
t = stats.now;
/* negative times mean relative to now. */
if(t0 <= 0)
t0 += t;
if(t1 <= 0)
t1 += t;
/* ten minute range if none given */
if(t1 <= t0)
t0 = t1 - 60*10;
if(0) fprint(2, "stats %ld-%ld\n", t0, t1);
/* binary search to find t0-1 or close */
lo = stattime;
hi = stattime+nstathist;
while(lo+1 < hi){
m = (lo+hi)/2;
if(stathist[m%nstathist].now >= t0)
hi = m;
else
lo = m;
}
xt0 = stathist[lo%nstathist].now;
if(0) fprint(2, "bsearch found %ld\n", xt0);
if(xt0 >= t1){
/* no samples */
memset(bin, 0, nbin*sizeof bin[0]);
return;
}
hi = stattime+nstathist;
te = t0;
j = lo+1;
for(i=0; i<nbin; i++){
t = te;
te = t0 + (t1-t0)*i/nbin;
b = &bin[i];
memset(b, 0, sizeof *b);
tot = 0;
oj = j;
for(; j<hi && stathist[j%nstathist].now<te; j++){
v = fn(&stathist[(j-1)%nstathist], &stathist[j%nstathist], arg);
if(b->nsamp==0 || v < b->min)
b->min = v;
if(b->nsamp==0 || v > b->max)
b->max = v;
tot += v;
b->nsamp++;
}
if(0) fprint(2, "bin%d: %ld to %ld; %d to %d - %d samples\n", i, t, te, oj, j, b->nsamp);
if(b->nsamp)
b->avg = tot / b->nsamp;
if(b->nsamp==0 && i>0)
*b = bin[i-1];
}
}

View File

@ -0,0 +1,9 @@
#include <u.h>
#include <libc.h>
#include <venti.h>
#include <flate.h>
#include <libsec.h>
#include <thread.h>
#include <httpd.h>
#include <draw.h>
#include <memdraw.h>

View File

@ -0,0 +1,174 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
static int writeclumphead(Arena *arena, u64int aa, Clump *cl);
static int writeclumpmagic(Arena *arena, u64int aa, u32int magic);
int
clumpinfocmp(ClumpInfo *c, ClumpInfo *d)
{
return c->type != d->type
|| c->size != d->size
|| c->uncsize != d->uncsize
|| scorecmp(c->score, d->score)!=0;
}
/*
* synchronize the clump info directory with
* with the clumps actually stored in the arena.
* the directory should be at least as up to date
* as the arena's trailer.
*
* checks/updates at most n clumps.
*
* returns 0 if ok, flags if error occurred
*/
int
syncarena(Arena *arena, u64int start, u32int n, int zok, int fix)
{
ZBlock *lump;
Clump cl;
ClumpInfo ci;
static ClumpInfo zci = { .type = -1 };
u8int score[VtScoreSize];
u64int uncsize, used, aa;
u32int clump, clumps, cclumps, magic;
int err, flush, broken;
AState as;
used = arena->memstats.used;
clumps = arena->memstats.clumps;
cclumps = arena->memstats.cclumps;
uncsize = arena->memstats.uncsize;
trace(TraceProc, "syncarena start");
flush = 0;
err = 0;
for(; n; n--){
aa = arena->memstats.used;
clump = arena->memstats.clumps;
magic = clumpmagic(arena, aa);
if(magic == ClumpFreeMagic)
break;
if(magic != arena->clumpmagic){
fprint(2, "%s: illegal clump magic number=%#8.8ux at clump=%d\n", arena->name, magic, clump);
/* err |= SyncDataErr; */
if(fix && writeclumpmagic(arena, aa, ClumpFreeMagic) < 0){
fprint(2, "can't write corrected clump free magic: %r");
err |= SyncFixErr;
}
break;
}
broken = 0;
lump = loadclump(arena, aa, 0, &cl, score, 0);
if(lump == nil){
fprint(2, "%s: clump=%d failed to read correctly: %r\n", arena->name, clump);
break;
err |= SyncDataErr;
}else if(cl.info.type != VtCorruptType){
scoremem(score, lump->data, cl.info.uncsize);
if(scorecmp(cl.info.score, score) != 0){
/* ignore partially written block */
if(cl.encoding == ClumpENone)
break;
fprint(2, "%s: clump=%d has mismatched score\n", arena->name, clump);
err |= SyncDataErr;
broken = 1;
}else if(vttypevalid(cl.info.type) < 0){
fprint(2, "%s: clump=%d has invalid type %d", arena->name, clump, cl.info.type);
err |= SyncDataErr;
broken = 1;
}
if(broken && fix){
cl.info.type = VtCorruptType;
if(writeclumphead(arena, aa, &cl) < 0){
fprint(2, "%s: can't write corrected clump header: %r", arena->name);
err |= SyncFixErr;
}
}
}
freezblock(lump);
arena->memstats.used += ClumpSize + cl.info.size;
arena->memstats.clumps++;
if(!broken && readclumpinfo(arena, clump, &ci)<0){
fprint(2, "%s: arena directory read failed\n", arena->name);
broken = 1;
}else if(!broken && clumpinfocmp(&ci, &cl.info)!=0){
if(clumpinfocmp(&ci, &zci) == 0){
err |= SyncCIZero;
if(!zok)
fprint(2, "%s: unwritten clump info for clump=%d\n", arena->name, clump);
}else{
err |= SyncCIErr;
fprint(2, "%s: bad clump info for clump=%d\n", arena->name, clump);
fprint(2, "\texpected score=%V type=%d size=%d uncsize=%d\n",
cl.info.score, cl.info.type, cl.info.size, cl.info.uncsize);
fprint(2, "\tfound score=%V type=%d size=%d uncsize=%d\n",
ci.score, ci.type, ci.size, ci.uncsize);
}
broken = 1;
}
if(broken && fix){
flush = 1;
ci = cl.info;
if(writeclumpinfo(arena, clump, &ci) < 0){
fprint(2, "%s: can't write correct clump directory: %r\n", arena->name);
err |= SyncFixErr;
}
}
trace(TraceProc, "syncarena unindexed clump %V %d", cl.info.score, arena->memstats.clumps);
arena->memstats.uncsize += cl.info.uncsize;
if(cl.info.size < cl.info.uncsize)
arena->memstats.cclumps++;
}
if(flush){
trace(TraceProc, "syncarena flush");
arena->wtime = now();
if(arena->ctime == 0 && arena->memstats.clumps)
arena->ctime = arena->wtime;
flushdcache();
}
if(used != arena->memstats.used
|| clumps != arena->memstats.clumps
|| cclumps != arena->memstats.cclumps
|| uncsize != arena->memstats.uncsize)
err |= SyncHeader;
if(start && (err&SyncHeader)){
trace(TraceProc, "syncarena setdcachestate");
as.arena = arena;
as.aa = start+arena->memstats.used;
as.stats = arena->memstats;
setdcachestate(&as);
}
return err;
}
static int
writeclumphead(Arena *arena, u64int aa, Clump *cl)
{
ZBlock *zb;
int bad;
zb = alloczblock(ClumpSize, 0, arena->blocksize);
if(zb == nil)
return -1;
bad = packclump(cl, zb->data, arena->clumpmagic)<0
|| writearena(arena, aa, zb->data, ClumpSize) != ClumpSize;
freezblock(zb);
return bad ? -1 : 0;
}
static int
writeclumpmagic(Arena *arena, u64int aa, u32int magic)
{
u8int buf[U32Size];
packmagic(magic, buf);
return writearena(arena, aa, buf, U32Size) == U32Size;
}

View File

@ -0,0 +1,73 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
static int verbose;
void
usage(void)
{
fprint(2, "usage: syncindex [-fv] [-B blockcachesize] config\n");
threadexitsall("usage");
}
Config conf;
void
threadmain(int argc, char *argv[])
{
u32int bcmem, icmem;
int fix;
fix = 0;
bcmem = 0;
icmem = 0;
ARGBEGIN{
case 'B':
bcmem = unittoull(EARGF(usage()));
break;
case 'I':
icmem = unittoull(EARGF(usage()));
break;
case 'f':
fix++;
break;
case 'v':
verbose++;
break;
default:
usage();
break;
}ARGEND
if(!fix)
readonly = 1;
if(argc != 1)
usage();
if(initventi(argv[0], &conf) < 0)
sysfatal("can't init venti: %r");
if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
initdcache(bcmem);
initlumpcache(1*1024*1024, 1024/8);
icmem = u64log2(icmem / (sizeof(IEntry)+sizeof(IEntry*)) / ICacheDepth);
if(icmem < 4)
icmem = 4;
if(1) fprint(2, "initialize %d bytes of index cache for %d index entries\n",
(sizeof(IEntry)+sizeof(IEntry*)) * (1 << icmem) * ICacheDepth,
(1 << icmem) * ICacheDepth);
initicache(icmem, ICacheDepth);
initicachewrite();
if(mainindex->bloom)
startbloomproc(mainindex->bloom);
if(verbose)
printindex(2, mainindex);
if(syncindex(mainindex, fix, 1, 0) < 0)
sysfatal("failed to sync index=%s: %r\n", mainindex->name);
threadexitsall(0);
}

View File

@ -0,0 +1,167 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
enum
{
ClumpChunks = 32*1024
};
static int missing, wrong;
/*
* shell sort is plenty good enough
* because we're going to do a bunch of disk i/o's
*/
static void
sortclumpinfo(ClumpInfo *ci, int *s, int n)
{
int i, j, m, t;
for(m = (n + 3) / 5; m > 0; m = (m + 1) / 3){
for(i = n - m; i-- > 0;){
for(j = i + m; j < n; j += m){
if(memcmp(ci[s[j - m]].score, ci[s[j]].score, VtScoreSize) <= 0)
break;
t = s[j];
s[j] = s[j - m];
s[j - m] = t;
}
}
}
}
int
syncarenaindex(Index *ix, Arena *arena, u32int clump, u64int a, int fix, int *pflush, int check)
{
Packet *pack;
IEntry ie;
IAddr ia;
ClumpInfo *ci, *cis;
u64int *addrs;
int i, n, ok, *s, flush;
trace(TraceProc, "syncarenaindex enter");
flush = 0;
cis = MKN(ClumpInfo, ClumpChunks);
addrs = MKN(u64int, ClumpChunks);
s = MKN(int, ClumpChunks);
ok = 0;
for(; clump < arena->memstats.clumps; clump += n){
n = ClumpChunks;
if(n > arena->memstats.clumps - clump)
n = arena->memstats.clumps - clump;
n = readclumpinfos(arena, clump, cis, n);
if(n <= 0){
fprint(2, "arena directory read failed\n");
ok = -1;
break;
}
for(i = 0; i < n; i++){
addrs[i] = a;
a += cis[i].size + ClumpSize;
s[i] = i;
}
sortclumpinfo(cis, s, n);
for(i = 0; i < n; i++){
ci = &cis[s[i]];
ia.type = ci->type;
ia.size = ci->uncsize;
ia.addr = addrs[s[i]];
ia.blocks = (ci->size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
if(!check)
goto Add;
if(loadientry(ix, ci->score, ci->type, &ie) < 0){
trace(TraceProc, "syncarenaindex missing block %V.%d", ci->score, ci->type);
missing++;
if(0) fprint(2, "missing block type=%d score=%V\n", ci->type, ci->score);
}else if(iaddrcmp(&ia, &ie.ia) != 0){
trace(TraceProc, "syncarenaindex mismatched entry");
fprint(2, "\nmismatched index entry and clump at %d\n", clump + i);
fprint(2, "\tclump: type=%d size=%d blocks=%d addr=%lld\n", ia.type, ia.size, ia.blocks, ia.addr);
fprint(2, "\tindex: type=%d size=%d block=%d addr=%lld\n", ie.ia.type, ie.ia.size, ie.ia.blocks, ie.ia.addr);
pack = readlump(ie.score, ie.ia.type, ie.ia.size, nil);
packetfree(pack);
if(pack != nil){
fprint(2, "duplicated lump\n");
continue;
}
wrong++;
}else
continue;
Add:
if(!fix){
ok = -1;
continue;
}
flush = 1;
trace(TraceProc, "syncarenaindex insert %V", ci->score);
insertscore(ci->score, &ia, 1);
}
if(0 && clump / 1000 != (clump + n) / 1000)
fprint(2, ".");
}
free(cis);
free(addrs);
free(s);
if(flush){
flushdcache();
*pflush = 1;
}
return ok;
}
int
syncindex(Index *ix, int fix, int mustflush, int check)
{
Arena *arena;
u64int a;
u32int clump;
int i, e, e1, ok, ok1, flush;
ok = 0;
flush = 0;
for(i = 0; i < ix->narenas; i++){
trace(TraceProc, "syncindex start %d", i);
arena = ix->arenas[i];
clump = arena->memstats.clumps;
a = arena->memstats.used;
e = syncarena(arena, ix->amap[i].start, TWID32, fix, fix);
e1 = e;
if(fix)
e1 &= ~(SyncHeader|SyncCIZero|SyncCIErr);
if(e1 == SyncHeader)
fprint(2, "arena %s: header is out-of-date\n", arena->name);
if(e1)
ok = -1;
else{
ok1 = syncarenaindex(ix, arena, clump, a + ix->amap[i].start, fix, &flush, check);
if(ok1 < 0)
fprint(2, "syncarenaindex: %r\n");
if(fix && ok1==0 && (e & SyncHeader) && wbarena(arena) < 0)
fprint(2, "arena=%s header write failed: %r\n", arena->name);
ok |= ok1;
}
}
if(missing || wrong)
fprint(2, "syncindex: %d missing entries, %d wrong entries (flush=%d)\n", missing, wrong, flush);
if(fix && wbindex(ix) < 0){
fprint(2, "can't write back index header for %s: %r\n", ix->name);
return -1;
}
if(fix && flush){
flushdcache();
if(mustflush){
flushicache();
flushdcache();
}else
kickicache();
}
return ok;
}

38
src/cmd/venti/srv/trace.c Normal file
View File

@ -0,0 +1,38 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
char TraceDisk[] = "disk";
char TraceLump[] = "lump";
char TraceBlock[] = "block";
char TraceProc[] = "proc";
char TraceWork[] = "work";
char TraceQuiet[] = "quiet";
char TraceRpc[] = "rpc";
void
trace(char *level, char *fmt, ...)
{
char buf[512];
va_list arg;
if(level == nil || !ventilogging)
return;
va_start(arg, fmt);
vsnprint(buf, sizeof buf, fmt, arg);
va_end(arg);
vtlog(level, "<font size=-1>%T %s:</font> %s<br>\n",
threadgetname(), buf);
vtlog("all", "<font size=-1>%T <font color=#777777>%s</font> %s:</font> %s<br>\n",
level, threadgetname(), buf);
}
void
traceinit(void)
{
}
void
settrace(char *trace)
{
}

View File

@ -0,0 +1,30 @@
#include "stdinc.h"
#define TWID64 ((u64int)~(u64int)0)
u64int
unittoull(char *s)
{
char *es;
u64int n;
if(s == nil)
return TWID64;
n = strtoul(s, &es, 0);
if(*es == 'k' || *es == 'K'){
n *= 1024;
es++;
}else if(*es == 'm' || *es == 'M'){
n *= 1024*1024;
es++;
}else if(*es == 'g' || *es == 'G'){
n *= 1024*1024*1024;
es++;
}else if(*es == 't' || *es == 'T'){
n *= 1024*1024;
n *= 1024*1024;
}
if(*es != '\0')
return TWID64;
return n;
}

179
src/cmd/venti/srv/unwhack.c Normal file
View File

@ -0,0 +1,179 @@
#include "stdinc.h"
#include "whack.h"
enum
{
DMaxFastLen = 7,
DBigLenCode = 0x3c, /* minimum code for large lenth encoding */
DBigLenBits = 6,
DBigLenBase = 1 /* starting items to encode for big lens */
};
static uchar lenval[1 << (DBigLenBits - 1)] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4,
5,
6,
255,
255
};
static uchar lenbits[] =
{
0, 0, 0,
2, 3, 5, 5,
};
static uchar offbits[16] =
{
5, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 12, 13
};
static ushort offbase[16] =
{
0, 0x20,
0x40, 0x60,
0x80, 0xc0,
0x100, 0x180,
0x200, 0x300,
0x400, 0x600,
0x800, 0xc00,
0x1000,
0x2000
};
void
unwhackinit(Unwhack *uw)
{
uw->err[0] = '\0';
}
int
unwhack(Unwhack *uw, uchar *dst, int ndst, uchar *src, int nsrc)
{
uchar *s, *d, *dmax, *smax, lit;
ulong uwbits, lithist;
int i, off, len, bits, use, code, uwnbits, overbits;
d = dst;
dmax = d + ndst;
smax = src + nsrc;
uwnbits = 0;
uwbits = 0;
overbits = 0;
lithist = ~0;
while(src < smax || uwnbits - overbits >= MinDecode){
while(uwnbits <= 24){
uwbits <<= 8;
if(src < smax)
uwbits |= *src++;
else
overbits += 8;
uwnbits += 8;
}
/*
* literal
*/
len = lenval[(uwbits >> (uwnbits - 5)) & 0x1f];
if(len == 0){
if(lithist & 0xf){
uwnbits -= 9;
lit = (uwbits >> uwnbits) & 0xff;
lit &= 255;
}else{
uwnbits -= 8;
lit = (uwbits >> uwnbits) & 0x7f;
if(lit < 32){
if(lit < 24){
uwnbits -= 2;
lit = (lit << 2) | ((uwbits >> uwnbits) & 3);
}else{
uwnbits -= 3;
lit = (lit << 3) | ((uwbits >> uwnbits) & 7);
}
lit = (lit - 64) & 0xff;
}
}
if(d >= dmax){
snprint(uw->err, WhackErrLen, "too much output");
return -1;
}
*d++ = lit;
lithist = (lithist << 1) | (lit < 32) | (lit > 127);
continue;
}
/*
* length
*/
if(len < 255)
uwnbits -= lenbits[len];
else{
uwnbits -= DBigLenBits;
code = ((uwbits >> uwnbits) & ((1 << DBigLenBits) - 1)) - DBigLenCode;
len = DMaxFastLen;
use = DBigLenBase;
bits = (DBigLenBits & 1) ^ 1;
while(code >= use){
len += use;
code -= use;
code <<= 1;
uwnbits--;
if(uwnbits < 0){
snprint(uw->err, WhackErrLen, "len out of range");
return -1;
}
code |= (uwbits >> uwnbits) & 1;
use <<= bits;
bits ^= 1;
}
len += code;
while(uwnbits <= 24){
uwbits <<= 8;
if(src < smax)
uwbits |= *src++;
else
overbits += 8;
uwnbits += 8;
}
}
/*
* offset
*/
uwnbits -= 4;
bits = (uwbits >> uwnbits) & 0xf;
off = offbase[bits];
bits = offbits[bits];
uwnbits -= bits;
off |= (uwbits >> uwnbits) & ((1 << bits) - 1);
off++;
if(off > d - dst){
snprint(uw->err, WhackErrLen, "offset out of range: off=%d d=%ld len=%d nbits=%d", off, d - dst, len, uwnbits);
return -1;
}
if(d + len > dmax){
snprint(uw->err, WhackErrLen, "len out of range");
return -1;
}
s = d - off;
for(i = 0; i < len; i++)
d[i] = s[i];
d += len;
}
if(uwnbits < overbits){
snprint(uw->err, WhackErrLen, "compressed data overrun");
return -1;
}
len = d - dst;
return len;
}

252
src/cmd/venti/srv/utils.c Normal file
View File

@ -0,0 +1,252 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
int
namecmp(char *s, char *t)
{
return strncmp(s, t, ANameSize);
}
void
namecp(char *dst, char *src)
{
strncpy(dst, src, ANameSize - 1);
dst[ANameSize - 1] = '\0';
}
int
nameok(char *name)
{
char *t;
int c;
if(name == nil)
return -1;
for(t = name; c = *t; t++)
if(t - name >= ANameSize
|| c < ' ' || c >= 0x7f)
return -1;
return 0;
}
int
stru32int(char *s, u32int *r)
{
char *t;
u32int n, nn, m;
int c;
m = TWID32 / 10;
n = 0;
for(t = s; ; t++){
c = *t;
if(c < '0' || c > '9')
break;
if(n > m)
return -1;
nn = n * 10 + c - '0';
if(nn < n)
return -1;
n = nn;
}
*r = n;
return s != t && *t == '\0';
}
int
stru64int(char *s, u64int *r)
{
char *t;
u64int n, nn, m;
int c;
m = TWID64 / 10;
n = 0;
for(t = s; ; t++){
c = *t;
if(c < '0' || c > '9')
break;
if(n > m)
return -1;
nn = n * 10 + c - '0';
if(nn < n)
return -1;
n = nn;
}
*r = n;
return s != t && *t == '\0';
}
int
vttypevalid(int type)
{
return type < VtMaxType;
}
static char*
logit(int severity, char *fmt, va_list args)
{
char *s;
s = vsmprint(fmt, args);
if(s == nil)
return nil;
if(argv0 == nil)
fprint(2, "%s: err %d: %s\n", argv0, severity, s);
else
fprint(2, "err %d: %s\n", severity, s);
return s;
}
void
seterr(int severity, char *fmt, ...)
{
char *s;
va_list args;
va_start(args, fmt);
s = logit(severity, fmt, args);
va_end(args);
if(s == nil)
werrstr("error setting error");
else{
werrstr("%s", s);
free(s);
}
}
void
logerr(int severity, char *fmt, ...)
{
char *s;
va_list args;
va_start(args, fmt);
s = logit(severity, fmt, args);
va_end(args);
free(s);
}
u32int
now(void)
{
return time(nil);
}
int abortonmem = 1;
void *
emalloc(ulong n)
{
void *p;
p = malloc(n);
if(p == nil){
if(abortonmem)
abort();
sysfatal("out of memory allocating %lud", n);
}
memset(p, 0xa5, n);
if(0)print("emalloc %p-%p by %lux\n", p, (char*)p+n, getcallerpc(&n));
return p;
}
void *
ezmalloc(ulong n)
{
void *p;
p = malloc(n);
if(p == nil){
if(abortonmem)
abort();
sysfatal("out of memory allocating %lud", n);
}
memset(p, 0, n);
if(0)print("ezmalloc %p-%p by %lux\n", p, (char*)p+n, getcallerpc(&n));
return p;
}
void *
erealloc(void *p, ulong n)
{
p = realloc(p, n);
if(p == nil){
if(abortonmem)
abort();
sysfatal("out of memory allocating %lud", n);
}
if(0)print("erealloc %p-%p by %lux\n", p, (char*)p+n, getcallerpc(&p));
return p;
}
char *
estrdup(char *s)
{
char *t;
int n;
n = strlen(s) + 1;
t = emalloc(n);
memmove(t, s, n);
if(0)print("estrdup %p-%p by %lux\n", t, (char*)t+n, getcallerpc(&s));
return t;
}
/*
* return floor(log2(v))
*/
int
u64log2(u64int v)
{
int i;
for(i = 0; i < 64; i++)
if((v >> i) <= 1)
break;
return i;
}
int
vtproc(void (*fn)(void*), void *arg)
{
proccreate(fn, arg, 256*1024);
return 0;
}
int
ientryfmt(Fmt *fmt)
{
IEntry *ie;
ie = va_arg(fmt->args, IEntry*);
return fmtprint(fmt, "%V %22lld %3d %5d %3d",
ie->score, ie->ia.addr, ie->ia.type, ie->ia.size, ie->ia.blocks);
}
void
ventifmtinstall(void)
{
fmtinstall('F', vtfcallfmt);
fmtinstall('H', encodefmt);
fmtinstall('I', ientryfmt);
fmtinstall('V', vtscorefmt);
}
uint
msec(void)
{
return nsec()/1000000;
}
uint
countbits(uint n)
{
n = (n&0x55555555)+((n>>1)&0x55555555);
n = (n&0x33333333)+((n>>2)&0x33333333);
n = (n&0x0F0F0F0F)+((n>>4)&0x0F0F0F0F);
n = (n&0x00FF00FF)+((n>>8)&0x00FF00FF);
n = (n&0x0000FFFF)+((n>>16)&0x0000FFFF);
return n;
}

266
src/cmd/venti/srv/venti.c Normal file
View File

@ -0,0 +1,266 @@
#ifdef PLAN9PORT
#include <u.h>
#include <signal.h>
#endif
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
#include "whack.h"
int debug;
int nofork;
int mainstacksize = 256*1024;
VtSrv *ventisrv;
static void ventiserver(void*);
void
usage(void)
{
fprint(2, "usage: venti [-dw] [-a ventiaddress] [-h httpaddress] [-c config] [-C cachesize] [-I icachesize] [-B blockcachesize]\n");
threadexitsall("usage");
}
void
threadmain(int argc, char *argv[])
{
char *configfile, *haddr, *vaddr, *webroot;
u32int mem, icmem, bcmem, minbcmem;
Config config;
traceinit();
threadsetname("main");
vaddr = nil;
haddr = nil;
configfile = nil;
webroot = nil;
mem = 0;
icmem = 0;
bcmem = 0;
ARGBEGIN{
case 'a':
vaddr = EARGF(usage());
break;
case 'B':
bcmem = unittoull(EARGF(usage()));
break;
case 'c':
configfile = EARGF(usage());
break;
case 'C':
mem = unittoull(EARGF(usage()));
break;
case 'D':
settrace(EARGF(usage()));
break;
case 'd':
debug = 1;
nofork = 1;
break;
case 'h':
haddr = EARGF(usage());
break;
case 'I':
icmem = unittoull(EARGF(usage()));
break;
case 'L':
ventilogging = 1;
break;
case 's':
nofork = 1;
break;
case 'W':
webroot = EARGF(usage());
break;
default:
usage();
}ARGEND
if(argc)
usage();
if(!nofork)
rfork(RFNOTEG);
#ifdef PLAN9PORT
{
/* sigh - needed to avoid signals when writing to hungup networks */
struct sigaction sa;
memset(&sa, 0, sizeof sa);
sa.sa_handler = SIG_IGN;
sigaction(SIGPIPE, &sa, nil);
}
#endif
trace(TraceQuiet, "venti started");
fprint(2, "venti: ");
ventifmtinstall();
if(configfile == nil)
configfile = "venti.conf";
if(initarenasum() < 0)
fprint(2, "warning: can't initialize arena summing process: %r");
fprint(2, "conf...");
if(initventi(configfile, &config) < 0)
sysfatal("can't init server: %r");
if(mem == 0)
mem = config.mem;
if(bcmem == 0)
bcmem = config.bcmem;
if(icmem == 0)
icmem = config.icmem;
if(haddr == nil)
haddr = config.haddr;
if(vaddr == nil)
vaddr = config.vaddr;
if(vaddr == nil)
vaddr = "tcp!*!venti";
if(webroot == nil)
webroot = config.webroot;
if(queuewrites == 0)
queuewrites = config.queuewrites;
if(haddr){
fprint(2, "httpd %s...", haddr);
if(httpdinit(haddr, webroot) < 0)
fprint(2, "warning: can't start http server: %r");
}
fprint(2, "init...");
if(mem == 0xffffffffUL)
mem = 1 * 1024 * 1024;
if(0) fprint(2, "initialize %d bytes of lump cache for %d lumps\n",
mem, mem / (8 * 1024));
initlumpcache(mem, mem / (8 * 1024));
icmem = u64log2(icmem / (sizeof(IEntry)+sizeof(IEntry*)) / ICacheDepth);
if(icmem < 4)
icmem = 4;
if(0) fprint(2, "initialize %d bytes of index cache for %d index entries\n",
(sizeof(IEntry)+sizeof(IEntry*)) * (1 << icmem) * ICacheDepth,
(1 << icmem) * ICacheDepth);
initicache(icmem, ICacheDepth);
initicachewrite();
/*
* need a block for every arena and every process
*/
minbcmem = maxblocksize *
(mainindex->narenas + mainindex->nsects*4 + 16);
if(bcmem < minbcmem)
bcmem = minbcmem;
if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
initdcache(bcmem);
if(mainindex->bloom)
startbloomproc(mainindex->bloom);
fprint(2, "sync...");
if(syncindex(mainindex, 1, 0, 0) < 0)
sysfatal("can't sync server: %r");
if(queuewrites){
fprint(2, "queue...");
if(initlumpqueues(mainindex->nsects) < 0){
fprint(2, "can't initialize lump queues,"
" disabling write queueing: %r");
queuewrites = 0;
}
}
fprint(2, "announce %s...", vaddr);
ventisrv = vtlisten(vaddr);
if(ventisrv == nil)
sysfatal("can't announce %s: %r", vaddr);
fprint(2, "serving.\n");
if(nofork)
ventiserver(nil);
else
vtproc(ventiserver, nil);
}
static void
vtrerror(VtReq *r, char *error)
{
r->rx.msgtype = VtRerror;
r->rx.error = estrdup(error);
}
static void
ventiserver(void *v)
{
Packet *p;
VtReq *r;
char err[ERRMAX];
uint ms;
int cached, ok;
USED(v);
threadsetname("ventiserver");
trace(TraceWork, "start");
while((r = vtgetreq(ventisrv)) != nil){
trace(TraceWork, "finish");
trace(TraceWork, "start request %F", &r->tx);
trace(TraceRpc, "<- %F", &r->tx);
r->rx.msgtype = r->tx.msgtype+1;
addstat(StatRpcTotal, 1);
// print("req (arenas[0]=%p sects[0]=%p) %F\n",
// mainindex->arenas[0], mainindex->sects[0], &r->tx);
switch(r->tx.msgtype){
default:
vtrerror(r, "unknown request");
break;
case VtTread:
ms = msec();
r->rx.data = readlump(r->tx.score, r->tx.blocktype, r->tx.count, &cached);
ms = msec() - ms;
addstat2(StatRpcRead, 1, StatRpcReadTime, ms);
if(r->rx.data == nil){
addstat(StatRpcReadFail, 1);
rerrstr(err, sizeof err);
vtrerror(r, err);
}else{
addstat(StatRpcReadBytes, packetsize(r->rx.data));
addstat(StatRpcReadOk, 1);
if(cached)
addstat2(StatRpcReadCached, 1, StatRpcReadCachedTime, ms);
else
addstat2(StatRpcReadUncached, 1, StatRpcReadUncachedTime, ms);
}
break;
case VtTwrite:
p = r->tx.data;
r->tx.data = nil;
addstat(StatRpcWriteBytes, packetsize(p));
ms = msec();
ok = writelump(p, r->rx.score, r->tx.blocktype, 0, ms);
ms = msec() - ms;
addstat2(StatRpcWrite, 1, StatRpcWriteTime, ms);
if(ok < 0){
addstat(StatRpcWriteFail, 1);
rerrstr(err, sizeof err);
vtrerror(r, err);
}
break;
case VtTsync:
flushqueue();
flushdcache();
break;
}
trace(TraceRpc, "-> %F", &r->rx);
vtrespond(r);
trace(TraceWork, "start");
}
flushdcache();
flushicache();
threadexitsall(0);
}

View File

@ -0,0 +1,127 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
static int verbose;
void
usage(void)
{
fprint(2, "usage: verifyarena [-v]\n");
threadexitsall(0);
}
static void
readblock(uchar *buf, int n)
{
int nr, m;
for(nr = 0; nr < n; nr += m){
m = n - nr;
m = read(0, &buf[nr], m);
if(m <= 0)
sysfatal("can't read arena from standard input: %r");
}
}
static void
verifyarena(void)
{
Arena arena;
ArenaHead head;
ZBlock *b;
DigestState s;
u64int n, e;
u32int bs;
u8int score[VtScoreSize];
fprint(2, "verify arena from standard input\n");
memset(&arena, 0, sizeof arena);
memset(&s, 0, sizeof s);
/*
* read the little bit, which will included the header
*/
bs = MaxIoSize;
b = alloczblock(bs, 0, 0);
readblock(b->data, HeadSize);
sha1(b->data, HeadSize, nil, &s);
if(unpackarenahead(&head, b->data) < 0)
sysfatal("corrupted arena header: %r");
if(head.version != ArenaVersion4 && head.version != ArenaVersion5)
fprint(2, "warning: unknown arena version %d\n", head.version);
/*
* now we know how much to read
* read everything but the last block, which is special
*/
e = head.size - head.blocksize;
for(n = HeadSize; n < e; n += bs){
if(n + bs > e)
bs = e - n;
readblock(b->data, bs);
sha1(b->data, bs, nil, &s);
}
/*
* read the last block update the sum.
* the sum is calculated assuming the slot for the sum is zero.
*/
bs = head.blocksize;
readblock(b->data, bs);
sha1(b->data, bs-VtScoreSize, nil, &s);
sha1(zeroscore, VtScoreSize, nil, &s);
sha1(nil, 0, score, &s);
/*
* validity check on the trailer
*/
arena.blocksize = head.blocksize;
if(unpackarena(&arena, b->data) < 0)
sysfatal("corrupted arena trailer: %r");
scorecp(arena.score, &b->data[arena.blocksize - VtScoreSize]);
if(namecmp(arena.name, head.name) != 0)
sysfatal("arena header and trailer names clash: %s vs. %s\n", head.name, arena.name);
if(arena.version != head.version)
sysfatal("arena header and trailer versions clash: %d vs. %d\n", head.version, arena.version);
arena.size = head.size - 2 * head.blocksize;
/*
* check for no checksum or the same
*/
if(scorecmp(score, arena.score) != 0){
if(scorecmp(zeroscore, arena.score) != 0)
fprint(2, "warning: mismatched checksums for arena=%s, found=%V calculated=%V",
arena.name, arena.score, score);
scorecp(arena.score, score);
}else
fprint(2, "matched score\n");
printarena(2, &arena);
}
void
threadmain(int argc, char *argv[])
{
ventifmtinstall();
statsinit();
ARGBEGIN{
case 'v':
verbose++;
break;
default:
usage();
break;
}ARGEND
readonly = 1;
if(argc != 0)
usage();
verifyarena();
threadexitsall(0);
}

331
src/cmd/venti/srv/whack.c Normal file
View File

@ -0,0 +1,331 @@
#include "stdinc.h"
#include "whack.h"
typedef struct Huff Huff;
int compressblocks = 1;
enum
{
MaxFastLen = 9,
BigLenCode = 0x1f4, /* minimum code for large lenth encoding */
BigLenBits = 9,
BigLenBase = 4, /* starting items to encode for big lens */
MinOffBits = 6,
MaxOffBits = MinOffBits + 8,
MaxLen = 2051 /* max. length encodable in 24 bits */
};
enum
{
StatBytes,
StatOutBytes,
StatLits,
StatMatches,
StatLitBits,
StatOffBits,
StatLenBits,
MaxStat
};
struct Huff
{
short bits; /* length of the code */
ulong encode; /* the code */
};
static Huff lentab[MaxFastLen] =
{
{2, 0x2}, /* 10 */
{3, 0x6}, /* 110 */
{5, 0x1c}, /* 11100 */
{5, 0x1d}, /* 11101 */
{6, 0x3c}, /* 111100 */
{7, 0x7a}, /* 1111010 */
{7, 0x7b}, /* 1111011 */
{8, 0xf8}, /* 11111000 */
{8, 0xf9}, /* 11111001 */
};
static int thwmaxcheck;
void
whackinit(Whack *tw, int level)
{
thwmaxcheck = (1 << level);
thwmaxcheck -= thwmaxcheck >> 2;
if(thwmaxcheck < 2)
thwmaxcheck = 2;
else if(thwmaxcheck > 1024)
thwmaxcheck = 1024;
memset(tw, 0, sizeof *tw);
tw->begin = 2 * WhackMaxOff;
}
/*
* find a string in the dictionary
*/
static int
whackmatch(Whack *b, uchar **ss, uchar *esrc, ulong h, ulong now)
{
ushort then, off, last;
int bestoff, bestlen, check;
uchar *s, *t;
s = *ss;
if(esrc < s + MinMatch)
return -1;
if(s + MaxLen < esrc)
esrc = s + MaxLen;
bestoff = 0;
bestlen = 0;
check = thwmaxcheck;
last = 0;
for(then = b->hash[h]; check-- > 0; then = b->next[then & (WhackMaxOff - 1)]){
off = now - then;
if(off <= last || off > WhackMaxOff)
break;
/*
* don't need to check for the end because
* 1) s too close check above
*/
t = s - off;
if(s[0] == t[0] && s[1] == t[1] && s[2] == t[2]){
if(!bestlen || esrc - s > bestlen && s[bestlen] == t[bestlen]){
t += 3;
for(s += 3; s < esrc; s++){
if(*s != *t)
break;
t++;
}
if(s - *ss > bestlen){
bestlen = s - *ss;
bestoff = off;
if(bestlen > thwmaxcheck)
break;
}
}
}
s = *ss;
last = off;
}
*ss += bestlen;
return bestoff;
}
/*
* knuth vol. 3 multiplicative hashing
* each byte x chosen according to rules
* 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4
* with reasonable spread between the bytes & their complements
*
* the 3 byte value appears to be as almost good as the 4 byte value,
* and might be faster on some machines
*/
/*
#define hashit(c) ((((ulong)(c) * 0x6b43a9) >> (24 - HashLog)) & HashMask)
*/
#define hashit(c) (((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask)
/*
* lz77 compression with single lookup in a hash table for each block
*/
int
whack(Whack *w, uchar *dst, uchar *src, int n, ulong stats[WhackStats])
{
uchar *s, *ss, *sss, *esrc, *half, *wdst, *wdmax;
ulong cont, code, wbits;
ushort now;
int toff, lithist, h, len, bits, use, wnbits, lits, matches, offbits, lenbits;
if(!compressblocks || n < MinMatch)
return -1;
wdst = dst;
wdmax = dst + n;
now = w->begin;
s = src;
w->data = s;
cont = (s[0] << 16) | (s[1] << 8) | s[2];
esrc = s + n;
half = s + (n >> 1);
wnbits = 0;
wbits = 0;
lits = 0;
matches = 0;
offbits = 0;
lenbits = 0;
lithist = ~0;
while(s < esrc){
h = hashit(cont);
sss = s;
toff = whackmatch(w, &sss, esrc, h, now);
ss = sss;
len = ss - s;
for(; wnbits >= 8; wnbits -= 8){
if(wdst >= wdmax){
w->begin = now;
return -1;
}
*wdst++ = wbits >> (wnbits - 8);
}
if(len < MinMatch){
toff = *s;
lithist = (lithist << 1) | toff < 32 | toff > 127;
if(lithist & 0x1e){
wbits = (wbits << 9) | toff;
wnbits += 9;
}else if(lithist & 1){
toff = (toff + 64) & 0xff;
if(toff < 96){
wbits = (wbits << 10) | toff;
wnbits += 10;
}else{
wbits = (wbits << 11) | toff;
wnbits += 11;
}
}else{
wbits = (wbits << 8) | toff;
wnbits += 8;
}
lits++;
/*
* speed hack
* check for compression progress, bail if none achieved
*/
if(s > half){
if(4 * (s - src) < 5 * lits){
w->begin = now;
return -1;
}
half = esrc;
}
if(s + MinMatch <= esrc){
w->next[now & (WhackMaxOff - 1)] = w->hash[h];
w->hash[h] = now;
if(s + MinMatch < esrc)
cont = (cont << 8) | s[MinMatch];
}
now++;
s++;
continue;
}
matches++;
/*
* length of match
*/
if(len > MaxLen){
len = MaxLen;
ss = s + len;
}
len -= MinMatch;
if(len < MaxFastLen){
bits = lentab[len].bits;
wbits = (wbits << bits) | lentab[len].encode;
wnbits += bits;
lenbits += bits;
}else{
code = BigLenCode;
bits = BigLenBits;
use = BigLenBase;
len -= MaxFastLen;
while(len >= use){
len -= use;
code = (code + use) << 1;
use <<= (bits & 1) ^ 1;
bits++;
}
wbits = (wbits << bits) | (code + len);
wnbits += bits;
lenbits += bits;
for(; wnbits >= 8; wnbits -= 8){
if(wdst >= wdmax){
w->begin = now;
return -1;
}
*wdst++ = wbits >> (wnbits - 8);
}
}
/*
* offset in history
*/
toff--;
for(bits = MinOffBits; toff >= (1 << bits); bits++)
;
if(bits < MaxOffBits-1){
wbits = (wbits << 3) | (bits - MinOffBits);
if(bits != MinOffBits)
bits--;
wnbits += bits + 3;
offbits += bits + 3;
}else{
wbits = (wbits << 4) | 0xe | (bits - (MaxOffBits-1));
bits--;
wnbits += bits + 4;
offbits += bits + 4;
}
wbits = (wbits << bits) | toff & ((1 << bits) - 1);
for(; s != ss; s++){
if(s + MinMatch <= esrc){
h = hashit(cont);
w->next[now & (WhackMaxOff - 1)] = w->hash[h];
w->hash[h] = now;
if(s + MinMatch < esrc)
cont = (cont << 8) | s[MinMatch];
}
now++;
}
}
w->begin = now;
stats[StatBytes] += esrc - src;
stats[StatLits] += lits;
stats[StatMatches] += matches;
stats[StatLitBits] += (wdst - (dst + 2)) * 8 + wnbits - offbits - lenbits;
stats[StatOffBits] += offbits;
stats[StatLenBits] += lenbits;
if(wnbits & 7){
wbits <<= 8 - (wnbits & 7);
wnbits += 8 - (wnbits & 7);
}
for(; wnbits >= 8; wnbits -= 8){
if(wdst >= wdmax)
return -1;
*wdst++ = wbits >> (wnbits - 8);
}
stats[StatOutBytes] += wdst - dst;
return wdst - dst;
}
int
whackblock(uchar *dst, uchar *src, int ssize)
{
Whack w;
ulong stats[MaxStat];
int r;
whackinit(&w, 6);
r = whack(&w, dst, src, ssize, stats);
return r;
}

40
src/cmd/venti/srv/whack.h Normal file
View File

@ -0,0 +1,40 @@
typedef struct Whack Whack;
typedef struct Unwhack Unwhack;
enum
{
WhackStats = 8,
WhackErrLen = 64, /* max length of error message from thwack or unthwack */
WhackMaxOff = 16*1024, /* max allowed offset */
HashLog = 14,
HashSize = 1<<HashLog,
HashMask = HashSize - 1,
MinMatch = 3, /* shortest match possible */
MinDecode = 8, /* minimum bits to decode a match or lit; >= 8 */
MaxSeqMask = 8, /* number of bits in coding block mask */
MaxSeqStart = 256 /* max offset of initial coding block */
};
struct Whack
{
ushort begin; /* time of first byte in hash */
ushort hash[HashSize];
ushort next[WhackMaxOff];
uchar *data;
};
struct Unwhack
{
char err[WhackErrLen];
};
void whackinit(Whack*, int level);
void unwhackinit(Unwhack*);
int whack(Whack*, uchar *dst, uchar *src, int nsrc, ulong stats[WhackStats]);
int unwhack(Unwhack*, uchar *dst, int ndst, uchar *src, int nsrc);
int whackblock(uchar *dst, uchar *src, int ssize);

217
src/cmd/venti/srv/wrarena.c Normal file
View File

@ -0,0 +1,217 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
QLock godot;
char *host;
int readonly = 1; /* for part.c */
int mainstacksize = 256*1024;
Channel *c;
VtConn *z;
int fast; /* and a bit unsafe; only for benchmarking */
int haveaoffset;
int maxwrites = -1;
typedef struct ZClump ZClump;
struct ZClump
{
ZBlock *lump;
Clump cl;
u64int aa;
};
void
usage(void)
{
fprint(2, "usage: wrarena [-h host] arenafile [offset]\n");
threadexitsall("usage");
}
void
vtsendthread(void *v)
{
ZClump zcl;
USED(v);
while(recv(c, &zcl) == 1){
if(zcl.lump == nil)
break;
if(vtwrite(z, zcl.cl.info.score, zcl.cl.info.type, zcl.lump->data, zcl.cl.info.uncsize) < 0)
sysfatal("failed writing clump %llud: %r", zcl.aa);
freezblock(zcl.lump);
}
/*
* All the send threads try to exit right when
* threadmain is calling threadexitsall.
* Either libthread or the Linux NPTL pthreads library
* can't handle this condition (I suspect NPTL but have
* not confirmed this) and we get a seg fault in exit.
* I spent a day tracking this down with no success,
* so we're going to work around it instead by just
* sitting here and waiting for the threadexitsall to
* take effect.
*/
qlock(&godot);
}
static void
rdarena(Arena *arena, u64int offset)
{
u64int a, aa, e;
u32int magic;
Clump cl;
uchar score[VtScoreSize];
ZBlock *lump;
ZClump zcl;
fprint(2, "wrarena: copying %s to venti\n", arena->name);
printarena(2, arena);
a = arena->base;
e = arena->base + arena->size;
if(offset != ~(u64int)0) {
if(offset >= e-a)
sysfatal("bad offset %llud >= %llud\n",
offset, e-a);
aa = offset;
} else
aa = 0;
if(maxwrites != 0)
for(; aa < e; aa += ClumpSize+cl.info.size) {
magic = clumpmagic(arena, aa);
if(magic == ClumpFreeMagic)
break;
if(magic != arena->clumpmagic) {
// fprint(2, "illegal clump magic number %#8.8ux offset %llud\n",
// magic, aa);
break;
}
lump = loadclump(arena, aa, 0, &cl, score, 0);
if(lump == nil) {
fprint(2, "clump %llud failed to read: %r\n", aa);
break;
}
if(!fast && cl.info.type != VtCorruptType) {
scoremem(score, lump->data, cl.info.uncsize);
if(scorecmp(cl.info.score, score) != 0) {
fprint(2, "clump %llud has mismatched score\n", aa);
break;
}
if(vttypevalid(cl.info.type) < 0) {
fprint(2, "clump %llud has bad type %d\n", aa, cl.info.type);
break;
}
}
if(z && cl.info.type != VtCorruptType){
zcl.cl = cl;
zcl.lump = lump;
zcl.aa = aa;
send(c, &zcl);
}else
freezblock(lump);
if(maxwrites>0 && --maxwrites == 0)
break;
}
if(haveaoffset)
print("end offset %llud\n", aa);
}
void
threadmain(int argc, char *argv[])
{
int i;
char *file;
Arena *arena;
u64int offset, aoffset;
Part *part;
Dir *d;
uchar buf[8192];
ArenaHead head;
ZClump zerocl;
qlock(&godot);
aoffset = 0;
ARGBEGIN{
case 'f':
fast = 1;
ventidoublechecksha1 = 0;
break;
case 'h':
host = EARGF(usage());
break;
case 'o':
haveaoffset = 1;
aoffset = strtoull(EARGF(usage()), 0, 0);
break;
case 'M':
maxwrites = atoi(EARGF(usage()));
break;
default:
usage();
break;
}ARGEND
offset = ~(u64int)0;
switch(argc) {
default:
usage();
case 2:
offset = strtoull(argv[1], 0, 0);
/* fall through */
case 1:
file = argv[0];
}
fmtinstall('V', vtscorefmt);
statsinit();
if((d = dirstat(file)) == nil)
sysfatal("can't stat file %s: %r", file);
part = initpart(file, OREAD);
if(part == nil)
sysfatal("can't open file %s: %r", file);
if(readpart(part, aoffset, buf, sizeof buf) < 0)
sysfatal("can't read file %s: %r", file);
if(unpackarenahead(&head, buf) < 0)
sysfatal("corrupted arena header: %r");
if(aoffset+head.size > d->length)
sysfatal("arena is truncated: want %llud bytes have %llud\n",
head.size, d->length);
partblocksize(part, head.blocksize);
initdcache(8 * MaxDiskBlock);
arena = initarena(part, aoffset, head.size, head.blocksize);
if(arena == nil)
sysfatal("initarena: %r");
if(host && strcmp(host, "/dev/null") != 0){
z = vtdial(host);
if(z == nil)
sysfatal("could not connect to server: %r");
if(vtconnect(z) < 0)
sysfatal("vtconnect: %r");
}else
z = nil;
c = chancreate(sizeof(ZClump), 0);
for(i=0; i<12; i++)
vtproc(vtsendthread, nil);
rdarena(arena, offset);
if(vtsync(z) < 0)
sysfatal("executing sync: %r");
memset(&zerocl, 0, sizeof zerocl);
for(i=0; i<12; i++)
send(c, &zerocl);
if(z){
vthangup(z);
}
threadexitsall(0);
}

68
src/cmd/venti/srv/xml.c Normal file
View File

@ -0,0 +1,68 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
#include "xml.h"
void xmlarena(Hio *hout, Arena *s, char *tag, int indent){
xmlindent(hout, indent);
hprint(hout, "<%s", tag);
xmlaname(hout, s->name, "name");
xmlu32int(hout, s->version, "version");
xmlaname(hout, s->part->name, "partition");
xmlu32int(hout, s->blocksize, "blocksize");
xmlu64int(hout, s->base, "start");
xmlu64int(hout, s->base+2*s->blocksize, "stop");
xmlu32int(hout, s->ctime, "created");
xmlu32int(hout, s->wtime, "modified");
xmlsealed(hout, s->memstats.sealed, "sealed");
xmlscore(hout, s->score, "score");
xmlu32int(hout, s->memstats.clumps, "clumps");
xmlu32int(hout, s->memstats.cclumps, "compressedclumps");
xmlu64int(hout, s->memstats.uncsize, "data");
xmlu64int(hout, s->memstats.used - s->memstats.clumps * ClumpSize, "compresseddata");
xmlu64int(hout, s->memstats.used + s->memstats.clumps * ClumpInfoSize, "storage");
hprint(hout, "/>\n");
}
void xmlindex(Hio *hout, Index *s, char *tag, int indent){
int i;
xmlindent(hout, indent);
hprint(hout, "<%s", tag);
xmlaname(hout, s->name, "name");
xmlu32int(hout, s->version, "version");
xmlu32int(hout, s->blocksize, "blocksize");
xmlu32int(hout, s->tabsize, "tabsize");
xmlu32int(hout, s->buckets, "buckets");
xmlu32int(hout, s->div, "buckdiv");
hprint(hout, ">\n");
xmlindent(hout, indent + 1);
hprint(hout, "<sects>\n");
for(i = 0; i < s->nsects; i++)
xmlamap(hout, &s->smap[i], "sect", indent + 2);
xmlindent(hout, indent + 1);
hprint(hout, "</sects>\n");
xmlindent(hout, indent + 1);
hprint(hout, "<amaps>\n");
for(i = 0; i < s->narenas; i++)
xmlamap(hout, &s->amap[i], "amap", indent + 2);
xmlindent(hout, indent + 1);
hprint(hout, "</amaps>\n");
xmlindent(hout, indent + 1);
hprint(hout, "<arenas>\n");
for(i = 0; i < s->narenas; i++)
xmlarena(hout, s->arenas[i], "arena", indent + 2);
xmlindent(hout, indent + 1);
hprint(hout, "</arenas>\n");
xmlindent(hout, indent);
hprint(hout, "</%s>\n", tag);
}
void xmlamap(Hio *hout, AMap *s, char *tag, int indent){
xmlindent(hout, indent);
hprint(hout, "<%s", tag);
xmlaname(hout, s->name, "name");
xmlu64int(hout, s->start, "start");
xmlu64int(hout, s->stop, "stop");
hprint(hout, "/>\n");
}

11
src/cmd/venti/srv/xml.h Normal file
View File

@ -0,0 +1,11 @@
void xmlamap(Hio *hout, AMap *v, char *tag, int indent);
void xmlarena(Hio *hout, Arena *v, char *tag, int indent);
void xmlindex(Hio *hout, Index *v, char *tag, int indent);
void xmlaname(Hio *hout, char *v, char *tag);
void xmlscore(Hio *hout, u8int *v, char *tag);
void xmlsealed(Hio *hout, int v, char *tag);
void xmlu32int(Hio *hout, u32int v, char *tag);
void xmlu64int(Hio *hout, u64int v, char *tag);
void xmlindent(Hio *hout, int indent);

View File

@ -0,0 +1,93 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
void
fmtzbinit(Fmt *f, ZBlock *b)
{
f->runes = 0;
f->start = b->data;
f->to = f->start;
f->stop = (char*)f->start + b->len;
f->flush = nil;
f->farg = nil;
f->nfmt = 0;
}
#define ROUNDUP(p, n) ((void*)(((ulong)(p)+(n)-1)&~(ulong)((n)-1)))
static char zmagic[] = "1234567890abcdefghijkl";
ZBlock *
alloczblock(u32int size, int zeroed, uint blocksize)
{
uchar *p, *data;
ZBlock *b;
static ZBlock z;
int n;
if(blocksize == 0)
blocksize = 32; /* try for cache line alignment */
n = size+32/*XXX*/+sizeof(ZBlock)+blocksize+8;
p = malloc(n);
if(p == nil){
seterr(EOk, "out of memory");
return nil;
}
data = ROUNDUP(p, blocksize);
b = ROUNDUP(data+size+32/*XXX*/, 8);
if(0) fprint(2, "alloc %p-%p data %p-%p b %p-%p\n",
p, p+n, data, data+size, b, b+1);
*b = z;
b->data = data;
b->free = p;
b->len = size;
b->_size = size;
if(zeroed)
memset(b->data, 0, size);
memmove(b->data+size, zmagic, 32/*XXX*/);
return b;
}
void
freezblock(ZBlock *b)
{
if(b){
if(memcmp(b->data+b->_size, zmagic, 32) != 0)
abort();
memset(b->data+b->_size, 0, 32);
free(b->free);
}
}
ZBlock*
packet2zblock(Packet *p, u32int size)
{
ZBlock *b;
if(p == nil)
return nil;
b = alloczblock(size, 0, 0);
if(b == nil)
return nil;
if(packetcopy(p, b->data, 0, size) < 0){
freezblock(b);
return nil;
}
return b;
}
Packet*
zblock2packet(ZBlock *zb, u32int size)
{
Packet *p;
if(zb == nil)
return nil;
p = packetalloc();
packetappend(p, zb->data, size);
return p;
}

View File

@ -0,0 +1,31 @@
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
void
zeropart(Part *part, int blocksize)
{
ZBlock *b;
u64int addr;
int w;
fprint(2, "clearing the partition\n");
//fprint(2, "NOT!\n");
//return;
//b=alloczblock(MaxIoSize, 1, blocksize);
//freezblock(b);
b = alloczblock(MaxIoSize, 1, blocksize);
w = 0;
for(addr = PartBlank; addr + MaxIoSize <= part->size; addr += MaxIoSize){
if(writepart(part, addr, b->data, MaxIoSize) < 0)
sysfatal("can't initialize %s, writing block %d failed: %r", part->name, w);
w++;
}
for(; addr + blocksize <= part->size; addr += blocksize)
if(writepart(part, addr, b->data, blocksize) < 0)
sysfatal("can't initialize %s: %r", part->name);
freezblock(b);
}

54
src/cmd/venti/sync.c Normal file
View File

@ -0,0 +1,54 @@
#include <u.h>
#include <libc.h>
#include <thread.h>
#include <venti.h>
char *host;
int donothing;
void
usage(void)
{
fprint(2, "usage: sync [-h host]\n");
threadexitsall("usage");
}
void
threadmain(int argc, char *argv[])
{
VtConn *z;
fmtinstall('V', vtscorefmt);
fmtinstall('F', vtfcallfmt);
ARGBEGIN{
case 'h':
host = EARGF(usage());
if(host == nil)
usage();
break;
case 'x':
donothing = 1;
break;
default:
usage();
break;
}ARGEND
if(argc != 0)
usage();
z = vtdial(host);
if(z == nil)
sysfatal("could not connect to server: %r");
if(vtconnect(z) < 0)
sysfatal("vtconnect: %r");
if(!donothing)
if(vtsync(z) < 0)
sysfatal("vtsync: %r");
vthangup(z);
threadexitsall(0);
}

62
src/cmd/venti/write.c Normal file
View File

@ -0,0 +1,62 @@
#include <u.h>
#include <libc.h>
#include <venti.h>
#include <libsec.h>
#include <thread.h>
void
usage(void)
{
fprint(2, "usage: write [-z] [-h host] [-t type] <datablock\n");
threadexitsall("usage");
}
void
threadmain(int argc, char *argv[])
{
char *host;
int dotrunc, n, type;
uchar *p, score[VtScoreSize];
VtConn *z;
fmtinstall('F', vtfcallfmt);
fmtinstall('V', vtscorefmt);
host = nil;
dotrunc = 0;
type = VtDataType;
ARGBEGIN{
case 'z':
dotrunc = 1;
break;
case 'h':
host = EARGF(usage());
break;
case 't':
type = atoi(EARGF(usage()));
break;
default:
usage();
break;
}ARGEND
if(argc != 0)
usage();
p = vtmallocz(VtMaxLumpSize+1);
n = readn(0, p, VtMaxLumpSize+1);
if(n > VtMaxLumpSize)
sysfatal("input too big: max block size is %d", VtMaxLumpSize);
z = vtdial(host);
if(z == nil)
sysfatal("could not connect to server: %r");
if(vtconnect(z) < 0)
sysfatal("vtconnect: %r");
if(dotrunc)
n = vtzerotruncate(type, p, n);
if(vtwrite(z, score, type, p, n) < 0)
sysfatal("vtwrite: %r");
vthangup(z);
print("%V\n", score);
threadexitsall(0);
}