1
0
Fork 0
arangodb/arangod/FulltextIndex2/zstr.c

1282 lines
27 KiB
C

/* zstr.c - the Z-string module */
/* R. A. Parker 3.12.2012 */
/* bugfixed in merge - adjtop call added */
/* bugfixed in tuber - wraparound */
/* bugfix shift of 64 not happening */
/* bugfix peeking after last word */
/* bugfix use firstix not z->firstix */
/* bugfix page turn */
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include "zstr.h"
ZSTR * ZStrCons(int elts)
{
ZSTR * z;
z=malloc(sizeof(ZSTR));
if(z==NULL) return NULL;
z->dat=malloc(elts*sizeof(uint64_t));
if(z->dat==NULL)
{
free(z);
return NULL;
}
z->dlen=malloc(elts*sizeof(long));
if(z->dlen==NULL)
{
free(z->dat);
free(z);
return NULL;
}
z->alloc=elts;
z->firstix=0;
z->lastix=0;
z->dat[0]=0;
z->dlen[0]=0;
return z;
}
void ZStrDest(ZSTR * z)
{
free(z->dat);
free(z->dlen);
free(z);
}
void ZStrClear(ZSTR * z)
{
z->firstix=0;
z->lastix=0;
z->dat[0]=0;
z->dlen[0]=0;
}
int ZStrBitsIn(uint64_t a, long bits, ZSTR * z)
{
long clen;
void * ptr;
clen=z->dlen[z->lastix];
if(clen+bits <= 64)
{
z->dat[z->lastix]=(z->dat[z->lastix]<<bits) + a;
z->dlen[z->lastix]=clen+bits;
}
else
{
if(z->lastix+1 >= z->alloc)
{
z->alloc=(z->alloc + z->alloc/4 + 2);
ptr=realloc(z->dat,z->alloc*sizeof(uint64_t));
if(ptr==NULL) return 1;
z->dat=ptr;
ptr=realloc(z->dlen,z->alloc*sizeof(long));
if(ptr==NULL) return 1;
z->dlen=ptr;
}
z->lastix++;
z->dat[z->lastix]=a;
z->dlen[z->lastix]=bits;
}
return 0;
}
uint64_t ZStrBitsOut(ZSTR * z, long bits)
{
uint64_t s,t;
long slen,wlen;
s=0;
slen=0;
while( slen+z->dlen[z->firstix] <= bits)
{
s<<=z->dlen[z->firstix];
s+=z->dat[z->firstix];
slen+=z->dlen[z->firstix];
if(z->firstix==z->lastix)
{
z->dlen[z->firstix]=0;
z->dat[z->firstix]=0;
return s<<(bits-slen);
}
z->firstix++;
}
wlen=bits-slen;
if(wlen==0) return s;
s<<=wlen;
t=z->dat[z->firstix]>>(z->dlen[z->firstix]-wlen);
s+=t;
z->dat[z->firstix]^=(t<<(z->dlen[z->firstix]-wlen));
z->dlen[z->firstix]-=wlen;
return s;
}
uint64_t ZStrBitsPeek(ZSTR * z, long bits)
{
uint64_t s;
int firstix;
long slen,wlen;
s=0;
slen=0;
firstix=z->firstix;
while( slen+z->dlen[firstix] <= bits)
{
s<<=z->dlen[firstix];
s+=z->dat[firstix];
slen+=z->dlen[firstix];
/* bugfix peeking after last word */
if(firstix==z->lastix)
return s<<(bits-slen);
firstix++;
}
wlen=bits-slen;
if(wlen==0) return s;
s<<=wlen;
/* bugfix use firstix not z->firstix */
s+=z->dat[firstix]>>(z->dlen[firstix]-wlen);
return s;
}
long ZStrLen(ZSTR * z)
{
long tot;
int i;
tot=0;
for(i=z->firstix;i<=z->lastix;i++) tot+=z->dlen[i];
return tot;
}
void ZStrNormalize(ZSTR * z)
{
while(z->lastix>z->firstix)
{
if(z->dat[z->lastix]!=0) break;
z->lastix--;
}
if(z->dat[z->lastix]==0)
{
z->dlen[z->lastix]=0;
return;
}
while( (z->dat[z->lastix]&1)==0 )
{
z->dat[z->lastix]>>=1;
z->dlen[z->lastix]--;
}
}
int ZStrEnc(ZSTR * z, ZCOD * zc, uint64_t a)
{
int seg;
switch (zc->t)
{
case 1:
for(seg=1;seg<=zc->s;seg++)
if(a<zc->X[seg]) break;
seg--;
return ZStrBitsIn(a-zc->X[seg]+zc->C[seg],zc->L[seg],z);
case 2:
if(a<=zc->tmax) a=zc->TX[a];
for(seg=1;seg<=zc->s;seg++)
if(a<zc->X[seg]) break;
seg--;
return ZStrBitsIn(a-zc->X[seg]+zc->C[seg],zc->L[seg],z);
default:
printf("invalid ZCOD type %d\n",zc->t);
exit(16);
}
}
uint64_t ZStrDec(ZSTR * z, ZCOD * zc)
{
int seg;
uint64_t topbit,s;
switch (zc->t)
{
case 1:
topbit=ZStrBitsPeek(z,zc->bits);
seg=zc->SG[topbit];
s=ZStrBitsOut(z,zc->L[seg]);
return (s-zc->C[seg])+zc->X[seg];
case 2:
topbit=ZStrBitsPeek(z,zc->bits);
seg=zc->SG[topbit];
s=ZStrBitsOut(z,zc->L[seg]);
s = (s-zc->C[seg])+zc->X[seg];
if(s<=zc->tmax) s=zc->UX[s];
return s;
default:
printf("invalid ZCOD type %d\n",zc->t);
exit(18);
}
}
uint64_t ZStrXlate(ZCOD * zc, uint64_t a)
{
if(a<=zc->tmax) return zc->TX[a];
return a;
}
uint64_t ZStrUnXl(ZCOD * zc, uint64_t a)
{
if(a<=zc->tmax) return zc->UX[a];
return a;
}
int ZStrLastEnc(ZSTR * z, uint64_t a)
{
uint64_t b;
long len;
if(a==0) return 0;
b=a;
len=1;
while(b>1)
{
len++;
b>>=1;
}
a-=b<<(len-1);
return ZStrBitsIn(1+(a<<1),len,z);
}
uint64_t ZStrLastDec(ZSTR * z)
{
long len;
uint64_t num,x;
len=ZStrLen(z);
if(len==0) num=0;
else
{
num=ZStrBitsOut(z,len);
x=1;
x<<=len;
num+=x;
}
return (num>>1);
}
void ZStrCxClear(ZCOD * zc, CTX * ctx)
{
ctx->x1=0;
}
int ZStrCxEnc(ZSTR * z, ZCOD * zc, CTX * ctx, uint64_t a)
{
int seg;
uint64_t b;
switch (zc->t)
{
case 1:
case 2:
return ZStrEnc(z,zc,a);
case 3:
b=a-ctx->x1;
ctx->x1=a;
for(seg=1;seg<=zc->s;seg++)
if(b<zc->X[seg]) break;
seg--;
return ZStrBitsIn(b-zc->X[seg]+zc->C[seg],zc->L[seg],z);
default:
printf("invalid ZCOD type %d\n",zc->t);
exit(17);
}
}
uint64_t ZStrCxDec(ZSTR * z, ZCOD * zc, CTX * ctx)
{
int seg;
uint64_t topbit,s;
switch (zc->t)
{
case 1:
case 2:
return ZStrDec(z,zc);
case 3:
topbit=ZStrBitsPeek(z,zc->bits);
seg=zc->SG[topbit];
s=ZStrBitsOut(z,zc->L[seg]);
s = (s-zc->C[seg])+zc->X[seg];
ctx->x1+=s;
return ctx->x1;
default:
printf("invalid ZCOD type %d\n",zc->t);
exit(17);
}
}
int ZStrMaxLen(ZSTR * z, int fmt)
{
uint64_t x;
if(fmt==2) x=15;
else
{
printf("unknown format %d in ZStrMaxLen\n",fmt);
exit(33);
}
return 1+(ZStrLen(z)/x);
}
int ZStrExtract(ZSTR * z, void * x, int fmt)
{
uint16_t * x2;
uint64_t s;
int len;
int words;
words=1;
if(fmt==2)
{
x2=(uint16_t *)x;
ZStrNormalize(z);
len=ZStrLen(z);
while(len>14)
{
words++;
s=ZStrBitsPeek(z,15);
if( (s&1)==1 )
{
s=ZStrBitsOut(z,15);
*(x2++)=1+(s<<1);
len-=15;
}
else
{
s=ZStrBitsOut(z,16);
*(x2++)=1+s;
/* next line looks unsafe, but all non-zero z-strings have */
/* their last bit 1, so if length is 15, previous case applies */
len-=16;
}
}
s=ZStrBitsOut(z,14);
*x2 = s<<2;
return words;
}
printf("Format %d not known in ZStrExtract\n",fmt);
return 0;
}
int ZStrInsert(ZSTR * z, void * x, int fmt)
{
uint16_t * x2;
uint64_t s;
int r;
if(fmt==2)
{
x2=(uint16_t *)x;
ZStrClear(z);
while(1)
{
s=*(x2++);
if( (s&3)==0 )
{
r=ZStrBitsIn(s>>2,14,z);
if(r!=0) return r;
ZStrNormalize(z);
return 0;
}
if( (s&3)==3 )
r=ZStrBitsIn(s>>1,15,z);
else
r=ZStrBitsIn(s-1,16,z);
if(r!=0) return r;
}
}
return 1;
}
int ZStrExtLen(void * x, int fmt)
{
uint16_t * w;
int len;
w = (uint16_t *) x;
len=1;
while(((*(w++))&3)!=0) len++;
return len;
}
STEX * ZStrSTCons(int fmt)
{
STEX * st;
int i;
st=malloc(sizeof(STEX));
if(st==NULL) return NULL;
st->pst=malloc(1281*sizeof(uint16_t *));
if(st->pst==NULL)
{
free(st);
return NULL;
}
st->ptp=malloc(1281*sizeof(uint16_t *));
if(st->ptp==NULL)
{
free(st->pst);
free(st);
return NULL;
}
st->mal=malloc(1281*sizeof(uint64_t));
if(st->mal==NULL)
{
free(st->ptp);
free(st->pst);
free(st);
return NULL;
}
st->stcnt=malloc(1281*sizeof(uint64_t));
if(st->stcnt==NULL)
{
free(st->mal);
free(st->ptp);
free(st->pst);
free(st);
return NULL;
}
for(i=0;i<1281;i++)
st->mal[i]=0;
for(i=0;i<6;i++) st->inuse[i]=0;
st->listm=0;
return st;
}
void ZStrSTDest(STEX * st)
{
int i;
for(i=0;i<1281;i++)
if(st->mal[i]!=0) free(st->pst[i]);
if(st->listm!=0) free(st->list);
free(st->pst);
free(st->ptp);
free(st->mal);
free(st->stcnt);
free(st);
}
int ZStrExtCompare(void * a, void * b, int fmt)
{
uint16_t *a1, *b1;
a1=(uint16_t *) a;
b1=(uint16_t *) b;
while(1)
{
if((*a1) < (*b1)) return -1;
if((*a1) > (*b1)) return 1;
if(((*a1)&3)==0)
{
if(((*b1)&3)==0)
return 0;
return -1;
}
if(((*b1)&3)==0)
return 1;
a1++;
b1++;
}
}
typedef struct
{
STEX * st;
uint16_t pq[256];
uint16_t ch[128];
} SICH;
#define DEBUG
#ifdef DEBUG
void dumpheap(SICH * si)
{
STEX * st;
int i,dat,ch;
st=si->st;
for(i=1;i<=50;i++)
{
dat=0xABCD;
if(i<128) ch=si->ch[i];
else ch=-1;
if(si->pq[i]<1280) dat=*(st->pst[si->pq[i]]);
printf("nd %3d pq %3d ch %3d dt %x\n",
i,si->pq[i],ch,dat);
}
}
#endif
/* the first letter of variables are used . . . */
/* h int 1-255 index si (pq,ch) heap numbers */
/* si look them up in SICH pq and you get */
/* s uint16_t 0-1278 index st. slot numbers */
/* st look them up in STEX st (pst,etc) */
#define EXPIRED 10000
static void pqadvance(SICH * si, int htop)
{
uint16_t snode;
STEX * st;
st=si->st;
snode=si->pq[htop];
st->stcnt[snode]--;
if(st->stcnt[snode]==0)
{
si->pq[htop]=EXPIRED;
return;
}
while((*(st->ptp[snode])&3)!=0) st->ptp[snode]++;
st->ptp[snode]++;
return;
}
static int heapcomp(SICH * si, int ha, int hb)
{
STEX * st;
int r;
uint16_t *wa,*wb;
st=si->st;
if(si->pq[hb]==EXPIRED) return -1;
if(si->pq[ha]==EXPIRED) return 1;
wa=st->ptp[si->pq[ha]];
wb=st->ptp[si->pq[hb]];
r= ZStrExtCompare((void*)wa,(void*)wb,2);
return r;
}
/* v int 0-7 index spath. level of operation */
/* spath look them up in spath to get an h */
static void adjtop(SICH * si, int htop)
{
int spath[8]; /* h = spath(v) */
int vlev;
int hcur,hpar,hsib; /* 1-255 heap points */
int r;
uint16_t temp;
vlev=0;
hcur=htop;
while(1) /* loop over all strings to insert */
{
/* populate the special path */
while(1)
{
spath[vlev]=hcur;
if(hcur>=128) break;
if( (si->pq[hcur]==EXPIRED) && (hcur!=htop) ) break;
hcur=2*hcur+si->ch[hcur];
vlev++;
}
while(1) /* find the correct place to put hcur */
{
if(vlev==0) return;
r = heapcomp(si,htop,hcur);
if(r!=-1) break;
vlev--;
hcur=spath[vlev];
}
if(r==1) while(1) /* bump up */
{
if(vlev==0) return;
hpar=spath[vlev-1];
hsib=hcur^1;
r = heapcomp(si,htop,hsib);
if(r==0) break;
if(r==1) si->ch[hpar]^=1;
temp=si->pq[hcur];
si->pq[hcur]=si->pq[htop];
si->pq[htop]=temp;
vlev--;
hcur=spath[vlev];
}
pqadvance(si, htop);
}
}
/* Return pointer to last string <= x */
void * ZStrSTFind(STEX * st, void * x)
{
uint16_t *wx, *w3, *w1, *w2;
int i;
if(st->listw==0) return NULL; /* list is empty */
wx = (uint16_t *) x;
w1=st->list; /* very first word */
w3=w1+st->listw-2; /* just before last word */
i=ZStrExtCompare( (void*)w1, (void*)wx,2);
if(i>0) return NULL; /* first word bigger */
while(w3>=w1)
{
if(((*w3)&3)==0) break;
w3--;
}
w3++; /* first word of last string */
/* x1 and x3 point to first and last string */
while(w1!=w3)
{
w2=w1+(w3-w1)/2;
while(w2>=w1)
{
if(((*w2)&3)==0) break;
w2--;
}
w2++;
if(w2==w1) /* no earlier start - try later */
{
w2=w1+(w3-w1)/2;
while(w2<w3)
{
if(((*w2)&3)==0) break;
w2++;
}
w2++;
if(w2>=w3) return w1;
}
i=ZStrExtCompare( (void*)w2, (void*)wx,2);
if(i>0) w3=w2;
else w1=w2;
}
return w1;
}
static int merge(STEX * st, int layer)
{
uint16_t sfst,slst,snpl,ssc,i;
uint16_t *wout, *w1;
SICH si;
size_t mem;
int hcur,r;
if(st->inuse[layer]==0) return 0;
si.st=st;
sfst=256*layer;
slst=sfst+st->inuse[layer]; /* one more than last */
snpl=256*(layer+1)+st->inuse[layer+1]; /* new place */
hcur=1;
mem=0;
for(i=sfst;i<slst;i++)
{
mem+=(st->ptp[i]-st->pst[i])*sizeof(uint16_t);
st->ptp[i]=st->pst[i];
si.pq[hcur++]=i;
}
while(hcur<256) si.pq[hcur++]=EXPIRED;
if(mem>st->mal[snpl])
{
if(st->mal[snpl]!=0) free(st->pst[snpl]);
st->pst[snpl]=malloc(mem);
if(st->pst[snpl]==NULL) return 1;
st->mal[snpl]=mem;
}
st->stcnt[snpl]=0;
hcur=127;
while(hcur>=1)
{
r=0;
while(r==0)
{
r=heapcomp(&si,2*hcur,2*hcur+1);
if(r!=0) break;
pqadvance(&si,2*hcur);
adjtop(&si,2*hcur); /* bugfix added */
}
if(r==-1) si.ch[hcur]=0;
else si.ch[hcur]=1;
adjtop(&si,hcur);
hcur--;
}
wout=st->pst[snpl];
while(si.pq[1]!=EXPIRED)
{
ssc=si.pq[1];
w1=st->ptp[ssc];
while(((*w1)&3)!=0) *(wout++) =*(w1++);
*(wout++) =*(w1++);
st->ptp[ssc]=w1;
st->stcnt[ssc]--;
if(st->stcnt[ssc]==0)
si.pq[1]=EXPIRED;
adjtop(&si,1);
st->stcnt[snpl]++;
}
st->ptp[snpl]=wout;
st->inuse[layer]=0;
st->inuse[layer+1]++;
if(st->inuse[layer+1] == 255) return merge(st,layer+1);
return 0;
}
int ZStrSTAppend(STEX * st, ZSTR * z)
{
size_t len;
int sno;
len=ZStrMaxLen(z,2)*sizeof(uint16_t);
sno=st->inuse[0];
if(len>st->mal[sno])
{
if(st->mal[sno]!=0) free(st->pst[sno]);
st->pst[sno]=malloc(len);
if(st->pst[sno]==NULL) return 1;
st->mal[sno]=len;
}
len=ZStrExtract(z,st->pst[sno],2);
st->ptp[sno]=st->pst[sno]+len;
st->stcnt[sno]=1;
st->inuse[0]++;
if(st->inuse[0]>=255) return merge(st,0);
return 0;
}
int ZStrSTSort(STEX * st)
{
int lev,lev2,mxlev,r;
uint16_t sans;
lev=0;
mxlev=0;
while(lev<6)
{
/* check to find maximum level */
for(lev2=0;lev2<6;lev2++)
if(st->inuse[lev2]!=0) mxlev=lev2;
if( (lev==mxlev) && (st->inuse[lev]==1) ) break;
r=merge(st,lev);
if(r!=0) return r;
lev++;
continue;
}
if(st->listm!=0) free (st->list);
if(st->inuse[lev]==0) /* nothing there at all! */
{
st->listw=0;
st->listm=0;
return 0;
}
sans=256*lev;
st->list=st->pst[sans];
st->listw=st->ptp[sans]-st->pst[sans];
st->listm=st->mal[sans];
st->cnt=st->stcnt[sans];
st->mal[sans]=0;
return 0;
}
TUBER * ZStrTuberCons(size_t size, int options)
{
TUBER * t;
int i;
t=malloc(sizeof(TUBER));
if(t==NULL) return NULL;
/* compute number of K-keys per word from options */
i=options&7;
t->kperw=0;
if(i==1) t->kperw=8;
if(i==2) t->kperw=4;
if(i==3) t->kperw=2;
if(i==4) t->kperw=1;
if(t->kperw == 0)
{
printf("Invalid options field in ZStrTuberCons\n");
exit(35);
}
/* compute maximum K-key from suggested size */
t->kmax=(size*t->kperw)/8;
t->kmax++;
if( (t->kmax%2) == 0) t->kmax++;
while(1)
{
t->kmax+=2;
for(i=3;i<47;i++)
if( (t->kmax%i)==0) break;
if(i==47) break;
}
t->wct = (t->kmax+t->kperw-1)/t->kperw;
t->tiptop=t->wct*t->kperw;
t->tub = malloc(8*t->wct);
if(t->tub == NULL)
{
free(t);
return NULL;
}
for(i=0;i<t->wct;i++) t->tub[i]=0x8000000000000000ll;
t->lenlen=3;
t->mult=8;
if(t->kperw==2)
{
t->lenlen=4;
t->mult=16;
}
if(t->kperw==1)
{
t->lenlen=5;
t->mult=32;
}
t->freekey=t->kmax;
t->freebit=(t->wct*63)-(t->kmax*(t->lenlen+1));
t->fuses=0;
return t;
}
void ZStrTuberDest(TUBER * t)
{
free(t->tub);
free(t);
}
typedef struct
{
TUBER * tub;
uint64_t curw; /* up on tub->tub */
long curb; /* 0-62 */
long hdrlen;
} CuR;
static void copycur(CuR * c1, CuR * c2)
{
c2->tub=c1->tub;
c2->curw=c1->curw;
c2->curb=c1->curb;
}
static uint64_t getbits(CuR * cur, long bits)
{
uint64_t got,got1;
uint64_t one;
long newbits;
uint64_t x;
TUBER * tub;
one=1;
tub=cur->tub;
if(bits+cur->curb < 63)
{
got=tub->tub[cur->curw];
cur->curb+=bits;
got>>=(63-cur->curb);
}
else
{
got=tub->tub[cur->curw];
newbits=bits+cur->curb-63;
cur->curb=newbits;
cur->curw++;
if(cur->curw>=tub->wct) cur->curw=0;
got1=tub->tub[cur->curw];
got1<<=1;
/* bugfix shift of 64 not happening */
if(newbits!=0)
got=(got<<newbits)+(got1>>(64-newbits));
}
x = got&((one<<bits)-one);
return x;
}
static void skipbits(CuR * cur, long bits)
{
TUBER * t;
if(bits<=0) return;
t=cur->tub;
cur->curw+=(bits/63);
cur->curb+=(bits%63);
if(cur->curb>62)
{
cur->curw++;
cur->curb-=63;
}
while(cur->curw>=t->wct) cur->curw-=t->wct;
}
static void putbits(CuR * cur, uint64_t data, long bits)
{
TUBER * tub;
uint64_t x1,x2;
uint64_t one;
long newbits;
tub=cur->tub;
one=1;
x2=(one<<(63-cur->curb))-one; /* mask for ~old bits */
if(bits+cur->curb < 63)
{
x1=(one<<(63-cur->curb-bits))-one;
x2=x1^x2; /* new bits mask */
x1=~x2; /* old bits mask */
x1=x1&tub->tub[cur->curw]; /*old bits (inc. top one) */
tub->tub[cur->curw]=x1+((data<<(63-cur->curb-bits))&x2);
cur->curb+=bits;
return;
}
x1=~x2;
x1=x1&tub->tub[cur->curw]; /* old bits */
newbits=cur->curb+bits-63;
tub->tub[cur->curw]=x1+(data>>(newbits));
cur->curw++;
if(cur->curw>=tub->wct) cur->curw=0;
cur->curb=newbits;
x1=((one<<(63-newbits))-one)|0x8000000000000000; /* keep these */
x2=tub->tub[cur->curw]&x1;
tub->tub[cur->curw]=x2+((data<<(63-newbits))&(~x1));
return;
}
static long gethdr(CuR * cur)
{
TUBER * t;
long dlen;
uint64_t h;
t=cur->tub;
h=getbits(cur,t->lenlen+1);
cur->hdrlen=t->lenlen+1;
dlen=h;
dlen-=2;
if(h<3) return dlen;
dlen=0;
while( (h>>t->lenlen)!=0 )
{
h-=t->mult;
h=(h<<1)+getbits(cur,1);
cur->hdrlen++;
dlen+=t->mult;
}
dlen+=h;
dlen-=2;
return dlen;
}
void ZStrTuberStats(TUBER * t, uint64_t * stats)
{
uint64_t d1,d2;
d1=(t->fuses*100)/t->wct;
d2=(t->freebit*100)/( (t->wct*63)-(t->kmax*(t->lenlen+1)) );
d2=100-d2;
if(d2>d1) d1=d2;
d2=(100*t->freekey)/t->kmax;
d2=100-d2;
if(d2>d1) d1=d2;
d2=((t->wct*8)*(d1+1))/50;
/*
printf("fuse %d freebit %d freekey %d kmax %d wct %d lenlen %d\n",
(int)t->fuses, (int)t->freebit, (int)t->freekey, (int)t->kmax,
(int)t->wct, (int)t->lenlen);
*/
if(d2<72*t->fuses)d2=72*t->fuses;
stats[0]=d1;
stats[1]=d2;
}
typedef struct
{
TUBER * tub;
uint64_t first;
uint64_t last;
uint64_t words;
} BlK;
/* Set cur to point to the wanted string */
static void locate(TUBER * t, uint64_t kkey, BlK * blk, CuR * cur)
{
uint64_t curkkey;
long dlen;
/* fill in the BlK structure with first, last and number of words */
blk->last = blk->first = kkey/t->kperw;
blk->words=1;
blk->tub=t;
if(blk->first>0) blk->first--;
else blk->first=t->wct-1;
while( (t->tub[blk->first]>>63)==0)
{
if(blk->first>0) blk->first--;
else blk->first=t->wct-1;
blk->words++;
}
blk->first++;
if(blk->first >= t->wct) blk->first=0;
while( (t->tub[blk->last]>>63)==0)
{
blk->last++;
if(blk->last >= t->wct) blk->last=0;
blk->words++;
}
/* set the CuR structure to point to the required string */
cur->tub=t;
cur->curw=blk->first;
cur->curb=0;
curkkey=blk->first*t->kperw;
while(curkkey!=kkey)
{
dlen=gethdr(cur);
if(dlen>0)skipbits(cur,dlen);
curkkey++;
/* bugfixed in tuber - wraparound */
if(curkkey==t->kperw*t->wct) curkkey=0;
}
}
/* grabs specified number of kkeys from cur*/
/* returns number of free bits, or -1 if memory allocation occurs */
long grabrest(CuR * cur, BlK * blk, uint64_t kkeys, ZSTR * z)
{
uint64_t i,b;
long j,k,freeb;
int r;
TUBER * t;
CuR cur1;
t = blk->tub;
for(i=0;i<kkeys;i++)
{
copycur(cur,&cur1);
j=gethdr(&cur1);
k=cur1.hdrlen;
while(k>63)
{
b=getbits(cur,63);
r=ZStrBitsIn(b,63,z);
if(r!=0) return -1;
k-=63;
}
b=getbits(cur,k);
r=ZStrBitsIn(b,k,z);
if(r!=0) return -1;
while(j>63)
{
b=getbits(cur,63);
r=ZStrBitsIn(b,63,z);
if(r!=0) return -1;
j-=63;
}
if(j>0)
{
b=getbits(cur,j);
r=ZStrBitsIn(b,j,z);
if(r!=0) return -1;
};
}
/* bugfix page turn */
if(cur->curb==0)
{
cur->curb=63;
if(cur->curw!=0) cur->curw--;
else cur->curw = t->wct-1;
}
/* end of bugfix page turn */
freeb=63-cur->curb;
while(cur->curw!=blk->last)
{
freeb+=63;
cur->curw++;
if(cur->curw>=t->wct) cur->curw=0;
}
return freeb;
}
static long blkfuse(BlK * blk, CuR * cur, ZSTR * z)
{
TUBER * t;
uint64_t kkeys;
t=blk->tub;
blk->last++;
if(blk->last >= t->wct) blk->last=0;
cur->curw=blk->last;
cur->curb=0;
blk->words++;
kkeys=t->kperw;
while( (t->tub[blk->last]>>63)==0)
{
blk->last++;
if(blk->last >= t->wct) blk->last=0;
blk->words++;
kkeys+=t->kperw;
}
return grabrest(cur,blk,kkeys,z);
}
void movebits(ZSTR * z, long bits, CuR * cur)
{
uint64_t j;
long bt;
bt=bits;
while(bt>60)
{
j=ZStrBitsOut(z,60);
bt-=60;
putbits(cur,j,60);
}
j=ZStrBitsOut(z,bt);
putbits(cur,j,bt);
}
int ZStrTuberRead(TUBER * t, uint64_t kkey, ZSTR * z)
{
long i;
int r;
uint64_t j;
BlK blk;
CuR cur;
locate(t,kkey,&blk,&cur);
i=gethdr(&cur);
if(i==-2) return 1;
ZStrClear(z);
if(i==-1) return 0;
while(i>60)
{
j=getbits(&cur,60);
r=ZStrBitsIn(j,60,z);
if(r!=0) return 2;
i-=60;
}
if(i>0)
{
j=getbits(&cur,i);
r=ZStrBitsIn(j,i,z);
if(r!=0) return 2;
}
r=ZStrBitsIn(1,1,z);
if(r!=0) return 2;
return 0;
}
uint64_t ZStrTuberIns(TUBER * t, uint64_t d1, uint64_t d2)
{
BlK blk;
CuR cur,cur1;
uint64_t kkey,keyb;
int i;
/* first find a keyb that works */
for(keyb=0;keyb<65536;keyb++)
{
kkey=ZStrTuberK(t,d1,d2,keyb);
locate(t,kkey,&blk, &cur);
copycur(&cur,&cur1);
i=gethdr(&cur);
if(i==-2) break;
}
if(keyb==65536) return INSFAIL;
/* equal size so change from key-not-found to zero */
putbits(&cur1,1,(cur.tub)->lenlen+1);
t->freekey--;
return keyb;
}
int ZStrTuberUpdate(TUBER * t, uint64_t kkey, ZSTR * z)
{
BlK blk;
CuR cur;
CuR cur1;
ZSTR * z1;
long i1,i2,i3,j,k,b1,sparebits,bitlen,spb1;
int i;
uint64_t kkeys;
uint64_t w,m1,m2;
int fuseflag;
locate(t,kkey,&blk, &cur);
copycur(&cur,&cur1);
i1=gethdr(&cur1);
if(i1<0)
i1=0;
i3=i1;
i1+=cur1.hdrlen; /* current total length in tuber */
j=ZStrLen(z);
k=j+1;
b1=0;
while(k>=(t->mult))
{
b1++;
k-=t->mult;
}
/* so b1 is the number of 1-bits in the header */
/* and k is the value of the remainder of the header bits */
/* and j is the length of the z-string part (inc. last 1) */
i2=b1+j+t->lenlen;
if(j==0) i2++;
/* so now i2 is the new length */
if(i2==i1) /* same length case */
{
for(i=0;i<b1;i++) putbits(&cur,1,1);
putbits(&cur,0,1);
putbits(&cur,k,t->lenlen);
if(j>1)movebits(z,j-1,&cur);
return 0;
}
t->freebit-=i2;
t->freebit+=i1;
skipbits(&cur1,i3);
kkeys=((blk.last+1)*t->kperw)-1;
if(kkeys>=kkey) kkeys=kkeys-kkey;
else kkeys=t->tiptop+kkeys-kkey;
z1=ZStrCons(kkeys/t->wct+7); /* first shot */
if(z1==NULL) return 1;
sparebits=grabrest(&cur1,&blk,kkeys,z1);
if(sparebits==-1) return 1;
fuseflag=0;
while(sparebits+i1<i2)
{
t->fuses++;
spb1=blkfuse(&blk,&cur1,z1);
if(spb1==-1) return 2;
sparebits += spb1;
fuseflag=1;
if(blk.words > (t->wct/3)) return 2;
}
sparebits=sparebits+i1-i2;
if(fuseflag==1)
{
m1=0x7fffffffffffffffull;
m2=0x8000000000000000ull;
w=blk.first;
while(w!=blk.last)
{
t->tub[w]&=m1;
w++;
if(w>=t->wct) w=0;
}
t->tub[w]|=m2;
}
bitlen=ZStrLen(z);
for(i=0;i<b1;i++) putbits(&cur,1,1);
putbits(&cur,0,1);
putbits(&cur,k,t->lenlen);
if(j>1) movebits(z,j-1,&cur);
bitlen=ZStrLen(z1);
movebits(z1,bitlen,&cur);
ZStrClear(z1);
movebits(z1,sparebits,&cur);
ZStrDest(z1);
return 0;
}
int ZStrTuberDelete(TUBER * t, uint64_t kkey)
{
BlK blk;
CuR cur;
CuR cur1;
ZSTR * z;
int r;
long i1,bitlen;
uint64_t kkeys;
locate(t,kkey,&blk, &cur);
copycur(&cur,&cur1);
i1=gethdr(&cur1);
t->freebit+=cur1.hdrlen;
skipbits(&cur1,i1);
kkeys=((blk.last+1)*t->kperw)-1;
if(kkeys>=kkey) kkeys=kkeys-kkey;
else kkeys=t->tiptop+kkeys-kkey;
z=ZStrCons(kkeys/t->wct+7); /* about right */
if(z==NULL) return 1;
r=grabrest(&cur1,&blk,kkeys,z);
if(r!=0) return 1;
bitlen=ZStrLen(z); /* probably should compute in grabrest */
putbits(&cur,0,t->lenlen+1); /* put in key-not-present */
movebits(z,bitlen,&cur);
t->freekey++;
t->freebit+=i1;
t->freebit-=t->lenlen;
return 0;
}
/* end of zstr.c */