931 lines
20 KiB
C
931 lines
20 KiB
C
#include "stdinc.h"
|
|
#include "dat.h"
|
|
#include "fns.h"
|
|
|
|
typedef struct ASum ASum;
|
|
|
|
struct ASum
|
|
{
|
|
Arena *arena;
|
|
ASum *next;
|
|
};
|
|
|
|
static void sealarena(Arena *arena);
|
|
static int okarena(Arena *arena);
|
|
static int loadarena(Arena *arena);
|
|
static CIBlock *getcib(Arena *arena, int clump, int writing, CIBlock *rock);
|
|
static void putcib(Arena *arena, CIBlock *cib);
|
|
static void sumproc(void *);
|
|
static void loadcig(Arena *arena);
|
|
|
|
static QLock sumlock;
|
|
static Rendez sumwait;
|
|
static ASum *sumq;
|
|
static ASum *sumqtail;
|
|
static uchar zero[8192];
|
|
|
|
int arenasumsleeptime;
|
|
|
|
int
|
|
initarenasum(void)
|
|
{
|
|
needzeroscore(); /* OS X */
|
|
|
|
qlock(&sumlock);
|
|
sumwait.l = &sumlock;
|
|
qunlock(&sumlock);
|
|
|
|
if(vtproc(sumproc, nil) < 0){
|
|
seterr(EOk, "can't start arena checksum slave: %r");
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* make an Arena, and initialize it based upon the disk header and trailer.
|
|
*/
|
|
Arena*
|
|
initarena(Part *part, u64int base, u64int size, u32int blocksize)
|
|
{
|
|
Arena *arena;
|
|
|
|
arena = MKZ(Arena);
|
|
arena->part = part;
|
|
arena->blocksize = blocksize;
|
|
arena->clumpmax = arena->blocksize / ClumpInfoSize;
|
|
arena->base = base + blocksize;
|
|
arena->size = size - 2 * blocksize;
|
|
|
|
if(loadarena(arena) < 0){
|
|
seterr(ECorrupt, "arena header or trailer corrupted");
|
|
freearena(arena);
|
|
return nil;
|
|
}
|
|
if(okarena(arena) < 0){
|
|
freearena(arena);
|
|
return nil;
|
|
}
|
|
|
|
if(arena->diskstats.sealed && scorecmp(zeroscore, arena->score)==0)
|
|
sealarena(arena);
|
|
|
|
return arena;
|
|
}
|
|
|
|
void
|
|
freearena(Arena *arena)
|
|
{
|
|
if(arena == nil)
|
|
return;
|
|
free(arena);
|
|
}
|
|
|
|
Arena*
|
|
newarena(Part *part, u32int vers, char *name, u64int base, u64int size, u32int blocksize)
|
|
{
|
|
int bsize;
|
|
Arena *arena;
|
|
|
|
if(nameok(name) < 0){
|
|
seterr(EOk, "illegal arena name", name);
|
|
return nil;
|
|
}
|
|
arena = MKZ(Arena);
|
|
arena->part = part;
|
|
arena->version = vers;
|
|
if(vers == ArenaVersion4)
|
|
arena->clumpmagic = _ClumpMagic;
|
|
else{
|
|
do
|
|
arena->clumpmagic = fastrand();
|
|
while(arena->clumpmagic==_ClumpMagic || arena->clumpmagic==0);
|
|
}
|
|
arena->blocksize = blocksize;
|
|
arena->clumpmax = arena->blocksize / ClumpInfoSize;
|
|
arena->base = base + blocksize;
|
|
arena->size = size - 2 * blocksize;
|
|
|
|
namecp(arena->name, name);
|
|
|
|
bsize = sizeof zero;
|
|
if(bsize > arena->blocksize)
|
|
bsize = arena->blocksize;
|
|
|
|
if(wbarena(arena)<0 || wbarenahead(arena)<0
|
|
|| writepart(arena->part, arena->base, zero, bsize)<0){
|
|
freearena(arena);
|
|
return nil;
|
|
}
|
|
|
|
return arena;
|
|
}
|
|
|
|
int
|
|
readclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
|
|
{
|
|
CIBlock *cib, r;
|
|
|
|
cib = getcib(arena, clump, 0, &r);
|
|
if(cib == nil)
|
|
return -1;
|
|
unpackclumpinfo(ci, &cib->data->data[cib->offset]);
|
|
putcib(arena, cib);
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
readclumpinfos(Arena *arena, int clump, ClumpInfo *cis, int n)
|
|
{
|
|
CIBlock *cib, r;
|
|
int i;
|
|
|
|
/*
|
|
* because the clump blocks are laid out
|
|
* in reverse order at the end of the arena,
|
|
* it can be a few percent faster to read
|
|
* the clumps backwards, which reads the
|
|
* disk blocks forwards.
|
|
*/
|
|
for(i = n-1; i >= 0; i--){
|
|
cib = getcib(arena, clump + i, 0, &r);
|
|
if(cib == nil){
|
|
n = i;
|
|
continue;
|
|
}
|
|
unpackclumpinfo(&cis[i], &cib->data->data[cib->offset]);
|
|
putcib(arena, cib);
|
|
}
|
|
return n;
|
|
}
|
|
|
|
/*
|
|
* write directory information for one clump
|
|
* must be called the arena locked
|
|
*/
|
|
int
|
|
writeclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
|
|
{
|
|
CIBlock *cib, r;
|
|
|
|
cib = getcib(arena, clump, 1, &r);
|
|
if(cib == nil)
|
|
return -1;
|
|
dirtydblock(cib->data, DirtyArenaCib);
|
|
packclumpinfo(ci, &cib->data->data[cib->offset]);
|
|
putcib(arena, cib);
|
|
return 0;
|
|
}
|
|
|
|
u64int
|
|
arenadirsize(Arena *arena, u32int clumps)
|
|
{
|
|
return ((clumps / arena->clumpmax) + 1) * arena->blocksize;
|
|
}
|
|
|
|
/*
|
|
* read a clump of data
|
|
* n is a hint of the size of the data, not including the header
|
|
* make sure it won't run off the end, then return the number of bytes actually read
|
|
*/
|
|
u32int
|
|
readarena(Arena *arena, u64int aa, u8int *buf, long n)
|
|
{
|
|
DBlock *b;
|
|
u64int a;
|
|
u32int blocksize, off, m;
|
|
long nn;
|
|
|
|
if(n == 0)
|
|
return -1;
|
|
|
|
qlock(&arena->lock);
|
|
a = arena->size - arenadirsize(arena, arena->memstats.clumps);
|
|
qunlock(&arena->lock);
|
|
if(aa >= a){
|
|
seterr(EOk, "reading beyond arena clump storage: clumps=%d aa=%lld a=%lld -1 clumps=%lld\n", arena->memstats.clumps, aa, a, arena->size - arenadirsize(arena, arena->memstats.clumps - 1));
|
|
return -1;
|
|
}
|
|
if(aa + n > a)
|
|
n = a - aa;
|
|
|
|
blocksize = arena->blocksize;
|
|
a = arena->base + aa;
|
|
off = a & (blocksize - 1);
|
|
a -= off;
|
|
nn = 0;
|
|
for(;;){
|
|
b = getdblock(arena->part, a, OREAD);
|
|
if(b == nil)
|
|
return -1;
|
|
m = blocksize - off;
|
|
if(m > n - nn)
|
|
m = n - nn;
|
|
memmove(&buf[nn], &b->data[off], m);
|
|
putdblock(b);
|
|
nn += m;
|
|
if(nn == n)
|
|
break;
|
|
off = 0;
|
|
a += blocksize;
|
|
}
|
|
return n;
|
|
}
|
|
|
|
/*
|
|
* write some data to the clump section at a given offset
|
|
* used to fix up corrupted arenas.
|
|
*/
|
|
u32int
|
|
writearena(Arena *arena, u64int aa, u8int *clbuf, u32int n)
|
|
{
|
|
DBlock *b;
|
|
u64int a;
|
|
u32int blocksize, off, m;
|
|
long nn;
|
|
int ok;
|
|
|
|
if(n == 0)
|
|
return -1;
|
|
|
|
qlock(&arena->lock);
|
|
a = arena->size - arenadirsize(arena, arena->memstats.clumps);
|
|
if(aa >= a || aa + n > a){
|
|
qunlock(&arena->lock);
|
|
seterr(EOk, "writing beyond arena clump storage");
|
|
return -1;
|
|
}
|
|
|
|
blocksize = arena->blocksize;
|
|
a = arena->base + aa;
|
|
off = a & (blocksize - 1);
|
|
a -= off;
|
|
nn = 0;
|
|
for(;;){
|
|
b = getdblock(arena->part, a, off != 0 || off + n < blocksize ? ORDWR : OWRITE);
|
|
if(b == nil){
|
|
qunlock(&arena->lock);
|
|
return -1;
|
|
}
|
|
dirtydblock(b, DirtyArena);
|
|
m = blocksize - off;
|
|
if(m > n - nn)
|
|
m = n - nn;
|
|
memmove(&b->data[off], &clbuf[nn], m);
|
|
ok = 0;
|
|
putdblock(b);
|
|
if(ok < 0){
|
|
qunlock(&arena->lock);
|
|
return -1;
|
|
}
|
|
nn += m;
|
|
if(nn == n)
|
|
break;
|
|
off = 0;
|
|
a += blocksize;
|
|
}
|
|
qunlock(&arena->lock);
|
|
return n;
|
|
}
|
|
|
|
/*
|
|
* allocate space for the clump and write it,
|
|
* updating the arena directory
|
|
ZZZ question: should this distinguish between an arena
|
|
filling up and real errors writing the clump?
|
|
*/
|
|
u64int
|
|
writeaclump(Arena *arena, Clump *c, u8int *clbuf)
|
|
{
|
|
DBlock *b;
|
|
u64int a, aa;
|
|
u32int clump, n, nn, m, off, blocksize;
|
|
int ok;
|
|
|
|
n = c->info.size + ClumpSize + U32Size;
|
|
qlock(&arena->lock);
|
|
aa = arena->memstats.used;
|
|
if(arena->memstats.sealed
|
|
|| aa + n + U32Size + arenadirsize(arena, arena->memstats.clumps + 1) > arena->size){
|
|
if(!arena->memstats.sealed){
|
|
logerr(EOk, "seal memstats %s", arena->name);
|
|
arena->memstats.sealed = 1;
|
|
wbarena(arena);
|
|
}
|
|
qunlock(&arena->lock);
|
|
return TWID64;
|
|
}
|
|
if(packclump(c, &clbuf[0], arena->clumpmagic) < 0){
|
|
qunlock(&arena->lock);
|
|
return TWID64;
|
|
}
|
|
|
|
/*
|
|
* write the data out one block at a time
|
|
*/
|
|
blocksize = arena->blocksize;
|
|
a = arena->base + aa;
|
|
off = a & (blocksize - 1);
|
|
a -= off;
|
|
nn = 0;
|
|
for(;;){
|
|
b = getdblock(arena->part, a, off != 0 ? ORDWR : OWRITE);
|
|
if(b == nil){
|
|
qunlock(&arena->lock);
|
|
return TWID64;
|
|
}
|
|
dirtydblock(b, DirtyArena);
|
|
m = blocksize - off;
|
|
if(m > n - nn)
|
|
m = n - nn;
|
|
memmove(&b->data[off], &clbuf[nn], m);
|
|
ok = 0;
|
|
putdblock(b);
|
|
if(ok < 0){
|
|
qunlock(&arena->lock);
|
|
return TWID64;
|
|
}
|
|
nn += m;
|
|
if(nn == n)
|
|
break;
|
|
off = 0;
|
|
a += blocksize;
|
|
}
|
|
|
|
arena->memstats.used += c->info.size + ClumpSize;
|
|
arena->memstats.uncsize += c->info.uncsize;
|
|
if(c->info.size < c->info.uncsize)
|
|
arena->memstats.cclumps++;
|
|
|
|
clump = arena->memstats.clumps;
|
|
if(clump % ArenaCIGSize == 0){
|
|
if(arena->cig == nil){
|
|
loadcig(arena);
|
|
if(arena->cig == nil)
|
|
goto NoCIG;
|
|
}
|
|
/* add aa as start of next cig */
|
|
if(clump/ArenaCIGSize != arena->ncig){
|
|
fprint(2, "bad arena cig computation %s: writing clump %d but %d cigs\n",
|
|
arena->name, clump, arena->ncig);
|
|
arena->ncig = -1;
|
|
vtfree(arena->cig);
|
|
arena->cig = nil;
|
|
goto NoCIG;
|
|
}
|
|
arena->cig = vtrealloc(arena->cig, (arena->ncig+1)*sizeof arena->cig[0]);
|
|
arena->cig[arena->ncig++].offset = aa;
|
|
}
|
|
NoCIG:
|
|
arena->memstats.clumps++;
|
|
|
|
if(arena->memstats.clumps == 0)
|
|
sysfatal("clumps wrapped");
|
|
arena->wtime = now();
|
|
if(arena->ctime == 0)
|
|
arena->ctime = arena->wtime;
|
|
|
|
writeclumpinfo(arena, clump, &c->info);
|
|
wbarena(arena);
|
|
|
|
qunlock(&arena->lock);
|
|
|
|
return aa;
|
|
}
|
|
|
|
int
|
|
atailcmp(ATailStats *a, ATailStats *b)
|
|
{
|
|
/* good test */
|
|
if(a->used < b->used)
|
|
return -1;
|
|
if(a->used > b->used)
|
|
return 1;
|
|
|
|
/* suspect tests - why order this way? (no one cares) */
|
|
if(a->clumps < b->clumps)
|
|
return -1;
|
|
if(a->clumps > b->clumps)
|
|
return 1;
|
|
if(a->cclumps < b->cclumps)
|
|
return -1;
|
|
if(a->cclumps > b->cclumps)
|
|
return 1;
|
|
if(a->uncsize < b->uncsize)
|
|
return -1;
|
|
if(a->uncsize > b->uncsize)
|
|
return 1;
|
|
if(a->sealed < b->sealed)
|
|
return -1;
|
|
if(a->sealed > b->sealed)
|
|
return 1;
|
|
|
|
/* everything matches */
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
setatailstate(AState *as)
|
|
{
|
|
int i, j, osealed;
|
|
Arena *a;
|
|
Index *ix;
|
|
|
|
trace(0, "setatailstate %s 0x%llux clumps %d", as->arena->name, as->aa, as->stats.clumps);
|
|
|
|
/*
|
|
* Look up as->arena to find index.
|
|
*/
|
|
needmainindex(); /* OS X linker */
|
|
ix = mainindex;
|
|
for(i=0; i<ix->narenas; i++)
|
|
if(ix->arenas[i] == as->arena)
|
|
break;
|
|
if(i==ix->narenas || as->aa < ix->amap[i].start || as->aa >= ix->amap[i].stop || as->arena != ix->arenas[i]){
|
|
fprint(2, "funny settailstate 0x%llux\n", as->aa);
|
|
return;
|
|
}
|
|
|
|
for(j=0; j<=i; j++){
|
|
a = ix->arenas[j];
|
|
if(atailcmp(&a->diskstats, &a->memstats) == 0)
|
|
continue;
|
|
qlock(&a->lock);
|
|
osealed = a->diskstats.sealed;
|
|
if(j == i)
|
|
a->diskstats = as->stats;
|
|
else
|
|
a->diskstats = a->memstats;
|
|
wbarena(a);
|
|
if(a->diskstats.sealed != osealed && !a->inqueue)
|
|
sealarena(a);
|
|
qunlock(&a->lock);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* once sealed, an arena never has any data added to it.
|
|
* it should only be changed to fix errors.
|
|
* this also syncs the clump directory.
|
|
*/
|
|
static void
|
|
sealarena(Arena *arena)
|
|
{
|
|
arena->inqueue = 1;
|
|
backsumarena(arena);
|
|
}
|
|
|
|
void
|
|
backsumarena(Arena *arena)
|
|
{
|
|
ASum *as;
|
|
|
|
as = MK(ASum);
|
|
if(as == nil)
|
|
return;
|
|
qlock(&sumlock);
|
|
as->arena = arena;
|
|
as->next = nil;
|
|
if(sumq)
|
|
sumqtail->next = as;
|
|
else
|
|
sumq = as;
|
|
sumqtail = as;
|
|
/*
|
|
* Might get here while initializing arenas,
|
|
* before initarenasum has been called.
|
|
*/
|
|
if(sumwait.l)
|
|
rwakeup(&sumwait);
|
|
qunlock(&sumlock);
|
|
}
|
|
|
|
static void
|
|
sumproc(void *unused)
|
|
{
|
|
ASum *as;
|
|
Arena *arena;
|
|
|
|
USED(unused);
|
|
|
|
for(;;){
|
|
qlock(&sumlock);
|
|
while(sumq == nil)
|
|
rsleep(&sumwait);
|
|
as = sumq;
|
|
sumq = as->next;
|
|
qunlock(&sumlock);
|
|
arena = as->arena;
|
|
free(as);
|
|
sumarena(arena);
|
|
}
|
|
}
|
|
|
|
void
|
|
sumarena(Arena *arena)
|
|
{
|
|
ZBlock *b;
|
|
DigestState s;
|
|
u64int a, e;
|
|
u32int bs;
|
|
int t;
|
|
u8int score[VtScoreSize];
|
|
|
|
bs = MaxIoSize;
|
|
if(bs < arena->blocksize)
|
|
bs = arena->blocksize;
|
|
|
|
/*
|
|
* read & sum all blocks except the last one
|
|
*/
|
|
flushdcache();
|
|
memset(&s, 0, sizeof s);
|
|
b = alloczblock(bs, 0, arena->part->blocksize);
|
|
e = arena->base + arena->size;
|
|
for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a += bs){
|
|
disksched();
|
|
while((t=arenasumsleeptime) == SleepForever){
|
|
sleep(1000);
|
|
disksched();
|
|
}
|
|
sleep(t);
|
|
if(a + bs > e)
|
|
bs = arena->blocksize;
|
|
if(readpart(arena->part, a, b->data, bs) < 0)
|
|
goto ReadErr;
|
|
addstat(StatSumRead, 1);
|
|
addstat(StatSumReadBytes, bs);
|
|
sha1(b->data, bs, nil, &s);
|
|
}
|
|
|
|
/*
|
|
* the last one is special, since it may already have the checksum included
|
|
*/
|
|
bs = arena->blocksize;
|
|
if(readpart(arena->part, e, b->data, bs) < 0){
|
|
ReadErr:
|
|
logerr(EOk, "sumarena can't sum %s, read at %lld failed: %r", arena->name, a);
|
|
freezblock(b);
|
|
return;
|
|
}
|
|
addstat(StatSumRead, 1);
|
|
addstat(StatSumReadBytes, bs);
|
|
|
|
sha1(b->data, bs-VtScoreSize, nil, &s);
|
|
sha1(zeroscore, VtScoreSize, nil, &s);
|
|
sha1(nil, 0, score, &s);
|
|
|
|
/*
|
|
* check for no checksum or the same
|
|
*/
|
|
if(scorecmp(score, &b->data[bs - VtScoreSize]) != 0
|
|
&& scorecmp(zeroscore, &b->data[bs - VtScoreSize]) != 0)
|
|
logerr(EOk, "overwriting mismatched checksums for arena=%s, found=%V calculated=%V",
|
|
arena->name, &b->data[bs - VtScoreSize], score);
|
|
freezblock(b);
|
|
|
|
qlock(&arena->lock);
|
|
scorecp(arena->score, score);
|
|
wbarena(arena);
|
|
qunlock(&arena->lock);
|
|
}
|
|
|
|
/*
|
|
* write the arena trailer block to the partition
|
|
*/
|
|
int
|
|
wbarena(Arena *arena)
|
|
{
|
|
DBlock *b;
|
|
int bad;
|
|
|
|
if((b = getdblock(arena->part, arena->base + arena->size, OWRITE)) == nil){
|
|
logerr(EAdmin, "can't write arena trailer: %r");
|
|
return -1;
|
|
}
|
|
dirtydblock(b, DirtyArenaTrailer);
|
|
bad = okarena(arena)<0 || packarena(arena, b->data)<0;
|
|
scorecp(b->data + arena->blocksize - VtScoreSize, arena->score);
|
|
putdblock(b);
|
|
if(bad)
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
wbarenahead(Arena *arena)
|
|
{
|
|
ZBlock *b;
|
|
ArenaHead head;
|
|
int bad;
|
|
|
|
namecp(head.name, arena->name);
|
|
head.version = arena->version;
|
|
head.size = arena->size + 2 * arena->blocksize;
|
|
head.blocksize = arena->blocksize;
|
|
head.clumpmagic = arena->clumpmagic;
|
|
b = alloczblock(arena->blocksize, 1, arena->part->blocksize);
|
|
if(b == nil){
|
|
logerr(EAdmin, "can't write arena header: %r");
|
|
/* ZZZ add error message? */
|
|
return -1;
|
|
}
|
|
/*
|
|
* this writepart is okay because it only happens
|
|
* during initialization.
|
|
*/
|
|
bad = packarenahead(&head, b->data)<0 ||
|
|
writepart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize)<0 ||
|
|
flushpart(arena->part)<0;
|
|
freezblock(b);
|
|
if(bad)
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* read the arena header and trailer blocks from disk
|
|
*/
|
|
static int
|
|
loadarena(Arena *arena)
|
|
{
|
|
ArenaHead head;
|
|
ZBlock *b;
|
|
|
|
b = alloczblock(arena->blocksize, 0, arena->part->blocksize);
|
|
if(b == nil)
|
|
return -1;
|
|
if(readpart(arena->part, arena->base + arena->size, b->data, arena->blocksize) < 0){
|
|
freezblock(b);
|
|
return -1;
|
|
}
|
|
if(unpackarena(arena, b->data) < 0){
|
|
freezblock(b);
|
|
return -1;
|
|
}
|
|
if(arena->version != ArenaVersion4 && arena->version != ArenaVersion5){
|
|
seterr(EAdmin, "unknown arena version %d", arena->version);
|
|
freezblock(b);
|
|
return -1;
|
|
}
|
|
scorecp(arena->score, &b->data[arena->blocksize - VtScoreSize]);
|
|
|
|
if(readpart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize) < 0){
|
|
logerr(EAdmin, "can't read arena header: %r");
|
|
freezblock(b);
|
|
return 0;
|
|
}
|
|
if(unpackarenahead(&head, b->data) < 0)
|
|
logerr(ECorrupt, "corrupted arena header: %r");
|
|
else if(namecmp(arena->name, head.name)!=0
|
|
|| arena->clumpmagic != head.clumpmagic
|
|
|| arena->version != head.version
|
|
|| arena->blocksize != head.blocksize
|
|
|| arena->size + 2 * arena->blocksize != head.size){
|
|
if(namecmp(arena->name, head.name)!=0)
|
|
logerr(ECorrupt, "arena tail name %s head %s",
|
|
arena->name, head.name);
|
|
else if(arena->clumpmagic != head.clumpmagic)
|
|
logerr(ECorrupt, "arena tail clumpmagic 0x%lux head 0x%lux",
|
|
(ulong)arena->clumpmagic, (ulong)head.clumpmagic);
|
|
else if(arena->version != head.version)
|
|
logerr(ECorrupt, "arena tail version %d head version %d",
|
|
arena->version, head.version);
|
|
else if(arena->blocksize != head.blocksize)
|
|
logerr(ECorrupt, "arena tail block size %d head %d",
|
|
arena->blocksize, head.blocksize);
|
|
else if(arena->size+2*arena->blocksize != head.size)
|
|
logerr(ECorrupt, "arena tail size %lud head %lud",
|
|
(ulong)arena->size+2*arena->blocksize, head.size);
|
|
else
|
|
logerr(ECorrupt, "arena header inconsistent with arena data");
|
|
}
|
|
freezblock(b);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
okarena(Arena *arena)
|
|
{
|
|
u64int dsize;
|
|
int ok;
|
|
|
|
ok = 0;
|
|
dsize = arenadirsize(arena, arena->diskstats.clumps);
|
|
if(arena->diskstats.used + dsize > arena->size){
|
|
seterr(ECorrupt, "arena %s used > size", arena->name);
|
|
ok = -1;
|
|
}
|
|
|
|
if(arena->diskstats.cclumps > arena->diskstats.clumps)
|
|
logerr(ECorrupt, "arena %s has more compressed clumps than total clumps", arena->name);
|
|
|
|
/*
|
|
* This need not be true if some of the disk is corrupted.
|
|
*
|
|
if(arena->diskstats.uncsize + arena->diskstats.clumps * ClumpSize + arena->blocksize < arena->diskstats.used)
|
|
logerr(ECorrupt, "arena %s uncompressed size inconsistent with used space %lld %d %lld", arena->name, arena->diskstats.uncsize, arena->diskstats.clumps, arena->diskstats.used);
|
|
*/
|
|
|
|
if(arena->ctime > arena->wtime)
|
|
logerr(ECorrupt, "arena %s creation time after last write time", arena->name);
|
|
|
|
return ok;
|
|
}
|
|
|
|
static CIBlock*
|
|
getcib(Arena *arena, int clump, int writing, CIBlock *rock)
|
|
{
|
|
int mode;
|
|
CIBlock *cib;
|
|
u32int block, off;
|
|
|
|
if(clump >= arena->memstats.clumps){
|
|
seterr(EOk, "clump directory access out of range");
|
|
return nil;
|
|
}
|
|
block = clump / arena->clumpmax;
|
|
off = (clump - block * arena->clumpmax) * ClumpInfoSize;
|
|
cib = rock;
|
|
cib->block = block;
|
|
cib->offset = off;
|
|
|
|
if(writing){
|
|
if(off == 0 && clump == arena->memstats.clumps-1)
|
|
mode = OWRITE;
|
|
else
|
|
mode = ORDWR;
|
|
}else
|
|
mode = OREAD;
|
|
|
|
cib->data = getdblock(arena->part,
|
|
arena->base + arena->size - (block + 1) * arena->blocksize, mode);
|
|
if(cib->data == nil)
|
|
return nil;
|
|
return cib;
|
|
}
|
|
|
|
static void
|
|
putcib(Arena *arena, CIBlock *cib)
|
|
{
|
|
USED(arena);
|
|
|
|
putdblock(cib->data);
|
|
cib->data = nil;
|
|
}
|
|
|
|
|
|
/*
|
|
* For index entry readahead purposes, the arenas are
|
|
* broken into smaller subpieces, called clump info groups
|
|
* or cigs. Each cig has ArenaCIGSize clumps (ArenaCIGSize
|
|
* is chosen to make the index entries take up about half
|
|
* a megabyte). The index entries do not contain enough
|
|
* information to determine what the clump index is for
|
|
* a given address in an arena. That info is needed both for
|
|
* figuring out which clump group an address belongs to
|
|
* and for prefetching a clump group's index entries from
|
|
* the arena table of contents. The first time clump groups
|
|
* are accessed, we scan the entire arena table of contents
|
|
* (which might be 10s of megabytes), recording the data
|
|
* offset of each clump group.
|
|
*/
|
|
|
|
/*
|
|
* load clump info group information by scanning entire toc.
|
|
*/
|
|
static void
|
|
loadcig(Arena *arena)
|
|
{
|
|
u32int i, j, ncig, nci;
|
|
ArenaCIG *cig;
|
|
ClumpInfo *ci;
|
|
u64int offset;
|
|
int ms;
|
|
|
|
if(arena->cig || arena->ncig < 0)
|
|
return;
|
|
|
|
// fprint(2, "loadcig %s\n", arena->name);
|
|
|
|
ncig = (arena->memstats.clumps+ArenaCIGSize-1) / ArenaCIGSize;
|
|
if(ncig == 0){
|
|
arena->cig = vtmalloc(1);
|
|
arena->ncig = 0;
|
|
return;
|
|
}
|
|
|
|
ms = msec();
|
|
cig = vtmalloc(ncig*sizeof cig[0]);
|
|
ci = vtmalloc(ArenaCIGSize*sizeof ci[0]);
|
|
offset = 0;
|
|
for(i=0; i<ncig; i++){
|
|
nci = readclumpinfos(arena, i*ArenaCIGSize, ci, ArenaCIGSize);
|
|
cig[i].offset = offset;
|
|
for(j=0; j<nci; j++)
|
|
offset += ClumpSize + ci[j].size;
|
|
if(nci < ArenaCIGSize){
|
|
if(i != ncig-1){
|
|
vtfree(ci);
|
|
vtfree(cig);
|
|
arena->ncig = -1;
|
|
fprint(2, "loadcig %s: got %ud cigs, expected %ud\n", arena->name, i+1, ncig);
|
|
goto out;
|
|
}
|
|
}
|
|
}
|
|
vtfree(ci);
|
|
|
|
arena->ncig = ncig;
|
|
arena->cig = cig;
|
|
|
|
out:
|
|
ms = msec() - ms;
|
|
addstat2(StatCigLoad, 1, StatCigLoadTime, ms);
|
|
}
|
|
|
|
/*
|
|
* convert arena address into arena group + data boundaries.
|
|
*/
|
|
int
|
|
arenatog(Arena *arena, u64int addr, u64int *gstart, u64int *glimit, int *g)
|
|
{
|
|
int r, l, m;
|
|
|
|
qlock(&arena->lock);
|
|
if(arena->cig == nil)
|
|
loadcig(arena);
|
|
if(arena->cig == nil || arena->ncig == 0){
|
|
qunlock(&arena->lock);
|
|
return -1;
|
|
}
|
|
|
|
l = 1;
|
|
r = arena->ncig - 1;
|
|
while(l <= r){
|
|
m = (r + l) / 2;
|
|
if(arena->cig[m].offset <= addr)
|
|
l = m + 1;
|
|
else
|
|
r = m - 1;
|
|
}
|
|
l--;
|
|
|
|
*g = l;
|
|
*gstart = arena->cig[l].offset;
|
|
if(l+1 < arena->ncig)
|
|
*glimit = arena->cig[l+1].offset;
|
|
else
|
|
*glimit = arena->memstats.used;
|
|
qunlock(&arena->lock);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* load the clump info for group g into the index entries.
|
|
*/
|
|
int
|
|
asumload(Arena *arena, int g, IEntry *entries, int nentries)
|
|
{
|
|
int i, base, limit;
|
|
u64int addr;
|
|
ClumpInfo ci;
|
|
IEntry *ie;
|
|
|
|
if(nentries < ArenaCIGSize){
|
|
fprint(2, "asking for too few entries\n");
|
|
return -1;
|
|
}
|
|
|
|
qlock(&arena->lock);
|
|
if(arena->cig == nil)
|
|
loadcig(arena);
|
|
if(arena->cig == nil || arena->ncig == 0 || g >= arena->ncig){
|
|
qunlock(&arena->lock);
|
|
return -1;
|
|
}
|
|
|
|
addr = 0;
|
|
base = g*ArenaCIGSize;
|
|
limit = base + ArenaCIGSize;
|
|
if(base > arena->memstats.clumps)
|
|
base = arena->memstats.clumps;
|
|
ie = entries;
|
|
for(i=base; i<limit; i++){
|
|
if(readclumpinfo(arena, i, &ci) < 0)
|
|
break;
|
|
if(ci.type != VtCorruptType){
|
|
scorecp(ie->score, ci.score);
|
|
ie->ia.type = ci.type;
|
|
ie->ia.size = ci.uncsize;
|
|
ie->ia.blocks = (ci.size + ClumpSize + (1<<ABlockLog) - 1) >> ABlockLog;
|
|
ie->ia.addr = addr;
|
|
ie++;
|
|
}
|
|
addr += ClumpSize + ci.size;
|
|
}
|
|
qunlock(&arena->lock);
|
|
return ie - entries;
|
|
}
|