1
0
Fork 0

fulltext index, initial version by RP

This commit is contained in:
Jan Steemann 2012-11-30 12:13:03 +01:00
parent f07e2c8005
commit cdfbb76c83
8 changed files with 3516 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,51 @@
/* ftsindex.h - The Full Text Search header file */
/* R. A. Parker 6.6.2012 */
typedef struct FTS_REAL_index FTS_index_t;
typedef struct
{
uint64_t _globalOptions;
size_t _len;
uint64_t * _localOptions;
uint8_t * * _texts;
} FTS_query_t;
/* local Options value (one for each word) */
#define FTS_MATCH_COMPLETE 1
#define FTS_MATCH_PREFIX 2
#define FTS_MATCH_SUBSTRING 4
#define FTS_INDEX_SUBSTRINGS 1
typedef struct
{
size_t _len;
FTS_document_id_t * _docs;
} FTS_document_ids_t;
#define FTS_SIZES_DEFAULT {10,1000,57,100,0,0,0,0,0,0}
FTS_index_t * FTS_CreateIndex(FTS_collection_id_t coll,
uint64_t options, uint64_t sizes[10]);
void FTS_FreeIndex ( FTS_index_t * ftx);
void FTS_BackgroundTask (FTS_index_t * ftx);
void FTS_AddDocument(FTS_index_t * ftx, FTS_document_id_t docid);
void FTS_DeleteDocument(FTS_index_t * ftx, FTS_document_id_t docid);
void FTS_UpdateDocument(FTS_index_t * ftx, FTS_document_id_t docid);
FTS_document_ids_t * FTS_FindDocuments (FTS_index_t * ftx,
FTS_query_t * query);
void FTS_Free_Documents(FTS_document_ids_t *);
void indexd(FTS_index_t * ftx);
/* end of ftsindex.h */

View File

@ -0,0 +1,438 @@
/* avodoc.c - My imitation of Avocado */
/* R. A. Parker 26.11.2012 */
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include "avodoc.h"
#include "FTS_index.h"
FTS_texts_t * cons()
{
FTS_texts_t * tx;
tx=malloc(sizeof(FTS_texts_t));
tx->_texts=malloc(10*sizeof(uint8_t *));
return tx;
}
uint8_t w1[]="trinket";
uint8_t w2[]="fred";
uint8_t w3[]="zebra";
uint8_t w4[]="aardvark";
uint8_t w5[]="freed";
uint8_t w6[]="fredp";
uint8_t w7[]="fredq";
uint8_t w8[]="fredr";
uint8_t wp[]="fre";
uint8_t wf[]="red";
void freg(void * doc)
{
printf("tried to free the document!\n");
}
FTS_texts_t * FTS_GetTexts
(FTS_collection_id_t colid, FTS_document_id_t docid)
{
FTS_texts_t * tx;
tx=cons();
if( (colid==2) && (docid==2) )
{
tx->_len=9;
tx->_texts[0]=w1;
tx->_texts[1]=w2;
tx->_texts[2]=w3;
tx->_texts[3]=w4;
tx->_texts[4]=w5;
tx->_texts[5]=w6;
tx->_texts[6]=w1;
tx->_texts[7]=w2;
tx->_texts[8]=w7;
}
if( (colid==2) && (docid==3) )
{
tx->_len=7;
tx->_texts[0]=w4;
tx->_texts[1]=w4;
tx->_texts[2]=w4;
tx->_texts[3]=w4;
tx->_texts[4]=w5;
tx->_texts[5]=w6;
tx->_texts[6]=w4;
}
if( (colid==2) && (docid==5) )
{
tx->_len=8;
tx->_texts[0]=w1;
tx->_texts[1]=w1;
tx->_texts[2]=w3;
tx->_texts[3]=w5;
tx->_texts[4]=w5;
tx->_texts[5]=w7;
tx->_texts[6]=w7;
tx->_texts[7]=w1;
}
if( (colid==2) && (docid==8) )
{
tx->_len=10;
tx->_texts[0]=w1;
tx->_texts[1]=w2;
tx->_texts[2]=w3;
tx->_texts[3]=w4;
tx->_texts[4]=w1;
tx->_texts[5]=w2;
tx->_texts[6]=w3;
tx->_texts[7]=w4;
tx->_texts[8]=w1;
tx->_texts[9]=w2;
}
if( (colid==2) && (docid==11) )
{
tx->_len=6;
tx->_texts[0]=w2;
tx->_texts[1]=w3;
tx->_texts[2]=w4;
tx->_texts[3]=w4;
tx->_texts[4]=w7;
tx->_texts[5]=w4;
}
if( (colid==1) && (docid==2) )
{
tx->_len=9;
tx->_texts[0]=w1;
tx->_texts[1]=w2;
tx->_texts[2]=w3;
tx->_texts[3]=w4;
tx->_texts[4]=w5;
tx->_texts[5]=w6;
tx->_texts[6]=w1;
tx->_texts[7]=w2;
tx->_texts[8]=w7;
}
if( (colid==1) && (docid==3) )
{
tx->_len=7;
tx->_texts[0]=w4;
tx->_texts[1]=w4;
tx->_texts[2]=w4;
tx->_texts[3]=w4;
tx->_texts[4]=w5;
tx->_texts[5]=w6;
tx->_texts[6]=w4;
}
if( (colid==1) && (docid==5) )
{
tx->_len=8;
tx->_texts[0]=w1;
tx->_texts[1]=w1;
tx->_texts[2]=w3;
tx->_texts[3]=w5;
tx->_texts[4]=w5;
tx->_texts[5]=w7;
tx->_texts[6]=w7;
tx->_texts[7]=w1;
}
if( (colid==1) && (docid==8) )
{
tx->_len=10;
tx->_texts[0]=w1;
tx->_texts[1]=w2;
tx->_texts[2]=w3;
tx->_texts[3]=w4;
tx->_texts[4]=w1;
tx->_texts[5]=w2;
tx->_texts[6]=w3;
tx->_texts[7]=w4;
tx->_texts[8]=w1;
tx->_texts[9]=w2;
}
if( (colid==1) && (docid==11) )
{
tx->_len=6;
tx->_texts[0]=w2;
tx->_texts[1]=w3;
tx->_texts[2]=w4;
tx->_texts[3]=w4;
tx->_texts[4]=w7;
tx->_texts[5]=w4;
}
tx->free=freg;
return tx;
}
int main(int argc, char ** argv)
{
long long x1;
int i;
int temp;
FTS_collection_id_t colid1;
FTS_document_id_t docid;
FTS_index_t * ftx, *ftx2;
FTS_query_t query;
FTS_document_ids_t * queryres;
uint64_t def[10]=FTS_SIZES_DEFAULT;
printf("Minature FTS-test program started\n");
query._localOptions = malloc(5*sizeof(uint64_t));
query._texts = malloc(5*sizeof(uint8_t *));
colid1=1;
ftx=FTS_CreateIndex(colid1,0,def);
if(ftx==NULL)
{
printf("Create returned NULL, so giving up\n");
return 1;
}
printf("Managed to create an index . . . so far so good\n");
docid=11;
FTS_AddDocument(ftx,docid);
printf("Added document 11\n");
docid=2;
FTS_AddDocument(ftx,docid);
printf("Added document 2\n");
docid=3;
FTS_AddDocument(ftx,docid);
printf("Added document 3\n");
docid=5;
FTS_AddDocument(ftx,docid);
printf("Added document 5\n");
docid=8;
FTS_AddDocument(ftx,docid);
printf("Added document 8\n");
FTS_BackgroundTask(ftx);
printf("Came out of background task\n");
FTS_BackgroundTask(ftx);
printf("Came out of background task again\n");
/* indexd(ftx); */
query._globalOptions = 0;
query._len = 1;
query._localOptions[0]=FTS_MATCH_COMPLETE; /* whole word */
query._texts[0] = w1;
queryres = FTS_FindDocuments(ftx,&query);
x1=queryres->_len;
printf("Resulted in %lld documents\n",x1);
for(i=0;i<x1;i++)
{
temp=queryres->_docs[i];
printf(" %d",temp);
}
printf("\n");
FTS_Free_Documents(queryres);
query._globalOptions = 0;
query._len = 2;
query._localOptions[0]=FTS_MATCH_COMPLETE; /* whole word */
query._texts[0] = w4;
query._localOptions[1]=FTS_MATCH_COMPLETE; /* whole word */
query._texts[1] = w2;
queryres = FTS_FindDocuments(ftx,&query);
x1=queryres->_len;
temp=x1;
printf("Resulted in %d documents\n",temp);
for(i=0;i<x1;i++)
{
temp=queryres->_docs[i];
printf(" %d",temp);
}
printf("\n");
FTS_Free_Documents(queryres);
docid=8;
FTS_DeleteDocument(ftx,docid);
printf("Deleted document 8\n");
/* first query */
query._globalOptions = 0;
query._len = 1;
query._localOptions[0]=FTS_MATCH_COMPLETE; /* whole word */
query._texts[0] = w1;
queryres = FTS_FindDocuments(ftx,&query);
x1=queryres->_len;
temp=x1;
printf("Resulted in %d documents\n",temp);
for(i=0;i<x1;i++)
{
temp=queryres->_docs[i];
printf(" %d",temp);
}
printf("\n");
FTS_Free_Documents(queryres);
/* second query */
query._globalOptions = 0;
query._len = 1;
query._localOptions[0]=FTS_MATCH_PREFIX; /* whole word */
query._texts[0] = wp;
queryres = FTS_FindDocuments(ftx,&query);
x1=queryres->_len;
temp=x1;
printf("Resulted in %d documents\n",temp);
for(i=0;i<x1;i++)
{
temp=queryres->_docs[i];
printf(" %d",temp);
}
printf("\n");
FTS_Free_Documents(queryres);
/* third query */
query._globalOptions = 0;
query._len = 2;
query._localOptions[0]=FTS_MATCH_COMPLETE;
query._localOptions[1]=FTS_MATCH_PREFIX; /* whole word */
query._texts[0] = w1;
query._texts[1] = wp;
queryres = FTS_FindDocuments(ftx,&query);
x1=queryres->_len;
temp=x1;
printf("Resulted in %d documents\n",temp);
for(i=0;i<x1;i++)
{
temp=queryres->_docs[i];
printf(" %d",temp);
}
printf("\n");
FTS_Free_Documents(queryres);
/* end of queries */
/* now create an index with partial words allowed */
colid1=2;
ftx2=FTS_CreateIndex(colid1,FTS_INDEX_SUBSTRINGS,def);
if(ftx2==NULL)
{
printf("Create returned NULL, so giving up\n");
return 1;
}
printf("Managed to create an index . . . so far so good\n");
docid=11;
FTS_AddDocument(ftx2,docid);
printf("Added document 11\n");
docid=2;
FTS_AddDocument(ftx2,docid);
printf("Added document 2\n");
docid=3;
FTS_AddDocument(ftx2,docid);
printf("Added document 3\n");
docid=5;
FTS_AddDocument(ftx2,docid);
printf("Added document 5\n");
docid=8;
FTS_AddDocument(ftx2,docid);
printf("Added document 8\n");
FTS_BackgroundTask(ftx2);
printf("Came out of background task\n");
FTS_BackgroundTask(ftx2);
printf("Came out of background task again\n");
/* indexd(ftx2); */
query._globalOptions = 0;
query._len = 1;
query._localOptions[0]=FTS_MATCH_COMPLETE; /* whole word */
query._texts[0] = w1;
queryres = FTS_FindDocuments(ftx2,&query);
x1=queryres->_len;
printf("Resulted in %lld documents\n",x1);
for(i=0;i<x1;i++)
{
temp=queryres->_docs[i];
printf(" %d",temp);
}
printf("\n");
FTS_Free_Documents(queryres);
query._globalOptions = 0;
query._len = 2;
query._localOptions[0]=FTS_MATCH_COMPLETE; /* whole word */
query._texts[0] = w4;
query._localOptions[1]=FTS_MATCH_COMPLETE; /* whole word */
query._texts[1] = w2;
queryres = FTS_FindDocuments(ftx2,&query);
x1=queryres->_len;
temp=x1;
printf("Resulted in %d documents\n",temp);
for(i=0;i<x1;i++)
{
temp=queryres->_docs[i];
printf(" %d",temp);
}
printf("\n");
FTS_Free_Documents(queryres);
docid=2;
FTS_DeleteDocument(ftx2,docid);
printf("Deleted document 2\n");
docid=8;
FTS_DeleteDocument(ftx2,docid);
printf("Deleted document 8\n");
/* first query */
query._globalOptions = 0;
query._len = 1;
query._localOptions[0]=FTS_MATCH_COMPLETE; /* whole word */
query._texts[0] = w1;
queryres = FTS_FindDocuments(ftx2,&query);
x1=queryres->_len;
temp=x1;
printf("Resulted in %d documents\n",temp);
for(i=0;i<x1;i++)
{
temp=queryres->_docs[i];
printf(" %d",temp);
}
printf("\n");
FTS_Free_Documents(queryres);
/* second query */
query._globalOptions = 0;
query._len = 1;
query._localOptions[0]=FTS_MATCH_PREFIX; /* whole word */
query._texts[0] = wp;
queryres = FTS_FindDocuments(ftx2,&query);
x1=queryres->_len;
temp=x1;
printf("Resulted in %d documents\n",temp);
for(i=0;i<x1;i++)
{
temp=queryres->_docs[i];
printf(" %d",temp);
}
printf("\n");
FTS_Free_Documents(queryres);
/* third query */
query._globalOptions = 0;
query._len = 2;
query._localOptions[0]=FTS_MATCH_COMPLETE;
query._localOptions[1]=FTS_MATCH_PREFIX; /* whole word */
query._texts[0] = w1;
query._texts[1] = wp;
queryres = FTS_FindDocuments(ftx2,&query);
x1=queryres->_len;
temp=x1;
printf("Resulted in %d documents\n",temp);
for(i=0;i<x1;i++)
{
temp=queryres->_docs[i];
printf(" %d",temp);
}
printf("\n");
FTS_Free_Documents(queryres);
query._globalOptions = 0;
query._len = 1;
query._localOptions[0]=FTS_MATCH_SUBSTRING; /* whole word */
query._texts[0] = wf;
queryres = FTS_FindDocuments(ftx2,&query);
x1=queryres->_len;
printf("Substring - Resulted in %lld documents\n",x1);
for(i=0;i<x1;i++)
{
temp=queryres->_docs[i];
printf(" %d",temp);
}
printf("\n");
FTS_Free_Documents(queryres);
/* end of queries */
FTS_FreeIndex(ftx2);
FTS_FreeIndex(ftx);
printf("First simple test completed - free'd the index again\n");
return 0;
}
/* end of avodoc.c */

View File

@ -0,0 +1,19 @@
/* avodoc.h - header file for FTS access to documents */
/* R. A. Parker 16.7.2012 */
typedef uint64_t FTS_collection_id_t;
typedef uint64_t FTS_document_id_t;
typedef struct
{
size_t _len;
uint8_t * * _texts;
void (*free)(void *);
} FTS_texts_t;
FTS_texts_t * FTS_GetTexts
(FTS_collection_id_t colid, FTS_document_id_t docid);
/* end of avodoc.h */

View File

@ -0,0 +1,98 @@
/* zcode.c - the Z-string code and hash module */
/* R. A. Parker 13.11.2012 */
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include "zstr.h"
/* zcutf code for storing letters in words */
uint64_t zcutfX[]={0,1,2,3,4,5,6,7,8,9,10,12,16,24,88,65624};
uint64_t zcutfC[]={0x0,0x8,0x4,0xC,0x2,0x6,0xA,0xE,
0x1,0x3,0xA,0x1C,0x48,0x2C0,0xD0000,0xF00000000};
uint8_t zcutfL[]={4,4,4,4,4,4,4,4,4,4,5,6,7,10,20,36};
uint8_t zcutfS[]={0,8,4,9,2,10,5,11,1,12,6,13,3,14,7,15};
uint8_t zcutfTX[]={0x00,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,
0x3C,0x3D,0x3E,0x3F,0x40,0x41,0x42,0x43,
0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,
0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,
0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,
0x5C,0x5D,0x5E,0x5F,0x60,0x61,0x62,0x63,
0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,
0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0x73,
0x74,0x1D,0x2E,0x27,0x24,0x1B,0x29,0x2F,
0x22,0x1F,0x32,0x30,0x25,0x28,0x20,0x1E,
0x2C,0x31,0x23,0x21,0x1C,0x26,0x2D,0x2A,
0x33,0x2B,0x34,0x75,0x76,0x77,0x78,0x79,
0x7A,0x03,0x14,0x0D,0x0A,0x01,0x0F,0x15,
0x08,0x05,0x18,0x16,0x0B,0x0E,0x06,0x04,
0x12,0x17,0x09,0x07,0x02,0x0C,0x13,0x10,
0x19,0x11,0x1A,0x7B,0x7C,0x7D,0x7E,0x7F};
uint8_t zcutfUX[]={0x00,0x65,0x74,0x61,0x6F,0x69,0x6E,0x73,
0x68,0x72,0x64,0x6C,0x75,0x63,0x6D,0x66,
0x77,0x79,0x70,0x76,0x62,0x67,0x6B,0x71,
0x6A,0x78,0x7A,0x45,0x54,0x41,0x4F,0x49,
0x4E,0x53,0x48,0x52,0x44,0x4C,0x55,0x43,
0x4D,0x46,0x57,0x59,0x50,0x56,0x42,0x47,
0x4B,0x51,0x4A,0x58,0x5A,0x01,0x02,0x03,
0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,
0x0C,0x0D,0x0E,0x0F,0x10,0x11,0x12,0x13,
0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,
0x1C,0x1D,0x1E,0x1F,0x20,0x21,0x22,0x23,
0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,
0x2C,0x2D,0x2E,0x2F,0x30,0x31,0x32,0x33,
0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,
0x3C,0x3D,0x3E,0x3F,0x40,0x5B,0x5C,0x5D,
0x5E,0x5F,0x60,0x7B,0x7C,0x7D,0x7E,0x7F};
ZCOD zcutf = {2,16,127,4,zcutfX,zcutfC,zcutfL,zcutfS,zcutfTX,zcutfUX};
/* zcbky code for storing a B_KEY */
uint64_t zcbkyX[]={0,1,2,3,4,20,276};
uint64_t zcbkyC[]={0,4,6,10,0xB0,0xE00,0xF0000};
uint8_t zcbkyL[]={1,3,3,4,8,12,20};
uint8_t zcbkyS[]={0,0,0,0, 0,0,0,0, 1,1,3,4, 2,2,5,6};
ZCOD zcbky = {1,7,0,4,zcbkyX,zcbkyC,zcbkyL,zcbkyS,NULL,NULL};
/* zcdelt code for storing UTF-8 deltas */
uint64_t zcdeltX[]={0,1,2,3,4,5,6,7,8,10,14,22,86,65622};
uint64_t zcdeltC[]={0,0x4,0x4,0xC,0x2,0x6,0xA,0xE,0x6,0x14,
0x38,0x2C0, 0xD0000,0xF00000000};
uint8_t zcdeltL[]={3,3,4,4,4,4,4,4,5,6,7,10,20,36};
uint8_t zcdeltS[]={0,0,4,8,2,9,5,10,1,1,6,11,3,12,7,13};
ZCOD zcdelt = {3,14,0,4,zcdeltX,zcdeltC,zcdeltL,zcdeltS,NULL,NULL};
/* zcdoc code for storing document handle deltas */
uint64_t zcdocX[]={0,1,3,11,43,171,1195,1049771};
uint64_t zcdocC[]={0,0x8,0x10,0xC0,0x80,0xC00,0x500000,0x38000000000};
uint8_t zcdocL[]={3,4,6,8,10,13,23,42};
uint8_t zcdocS[]={0,4,2,5,1,6,3,7};
ZCOD zcdoc = {3,8,0,3,zcdocX,zcdocC,zcdocL,zcdocS,NULL,NULL};
/* zckk code for storing direct K-KEY values */
uint64_t zckkX[]={0,65536,1114112,17891328};
uint64_t zckkC[]={0,0x200000,0x1000000,0x18000000000};
uint8_t zckkL[]={18,22,26,41};
uint8_t zckkS[]={0,2,1,3};
ZCOD zckk = {1,4,0,2,zckkX,zckkC,zckkL,zckkS,NULL,NULL};
/* zcdh code for putting doc handles into a stex */
uint64_t zcdhX[]={0,8192,134225920};
uint64_t zcdhC[]={0,0x10000000,0xC0000000000};
uint8_t zcdhL[]={14,29,44};
uint8_t zcdhS[]={0,0,1,2};
ZCOD zcdh = {1,4,0,2,zcdhX,zcdhC,zcdhL,zcdhS,NULL,NULL};
uint64_t ZStrTuberK(TUBER * t, uint64_t d1,
uint64_t d2, uint64_t keyb)
{
uint64_t keya;
if(d2<3) keya= (d1+5*d2) % t->kmax;
else keya = ( d1*(d1+d2) + 2*d2*d2 ) % t->kmax;
if(keyb==0) return keya;
if(keyb==1) return (keya+19)%t->kmax;
if(keyb==2) return (keya+43)%t->kmax;
if(keyb<47) return ((keya+3)*keyb)%t->kmax;
return ZStrTuberK(t,ZStrTuberK(t,d1,d2,keyb%47),0,keyb/47);
}
/* end of zcode.c */

1235
arangod/FulltextIndex/zstr.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,107 @@
/* zstr.h - header file for the z-string module */
/* R. A. Parker 3.5.2012 */
typedef struct
{
uint64_t * dat;
long * dlen;
int alloc;
int firstix;
int lastix;
} ZSTR;
ZSTR * ZStrCons(int elts);
void ZStrDest(ZSTR * z);
void ZStrClear(ZSTR * z);
void ZStrBitsIn(uint64_t a, long bits, ZSTR * z);
uint64_t ZStrBitsOut(ZSTR * z, long bits);
uint64_t ZStrBitsPeek(ZSTR * z, long bits);
long ZStrLen(ZSTR * z);
void ZStrNormalize(ZSTR * z);
typedef struct
{
int t; /* code type */
int s; /* segments */
int tmax; /* Top to translate */
int bits; /* that determine len */
uint64_t * X; /* first of segment */
uint64_t * C; /* code added */
uint8_t * L; /* length in bits */
uint8_t * SG; /* segment for top bits */
uint8_t * TX; /* translate table */
uint8_t * UX; /* untranslate table */
} ZCOD;
void ZStrEnc(ZSTR * z, ZCOD * zc, uint64_t a);
uint64_t ZStrDec(ZSTR * z, ZCOD * zc);
uint64_t ZStrXlate(ZCOD * zc, uint64_t a);
uint64_t ZStrUnXl(ZCOD * zc, uint64_t a);
void ZStrLastEnc(ZSTR * z, uint64_t a);
uint64_t ZStrLastDec(ZSTR * z);
typedef struct
{
uint64_t x1;
} CTX;
void ZStrCxClear(ZCOD * zc, CTX * ctx);
void ZStrCxEnc(ZSTR * z, ZCOD * zc, CTX * ctx, uint64_t a);
uint64_t ZStrCxDec(ZSTR * z, ZCOD * zc, CTX * ctx);
int ZStrMaxLen(ZSTR * z, int fmt);
int ZStrExtract(ZSTR * z, void * x, int fmt);
void ZStrInsert(ZSTR * z, void * x, int fmt);
int ZStrExtCompare(void * x, void * y, int fmt);
int ZStrExtLen(void * x, int fmt);
typedef struct
{
uint16_t ** pst; /* 1281 pointers to start */
uint16_t ** ptp; /* 1281 pointers to top */
uint64_t * mal; /* 1281 number of bytes allocated */
uint64_t * stcnt; /* 1281 number of strings in clump */
uint16_t inuse[6];
uint16_t * list; /* final list */
uint64_t listw; /* number of uint16s in final list */
uint64_t listm; /* number of uint16's malloc'd */
uint64_t cnt; /* number if strings in list */
} STEX;
STEX * ZStrSTCons(int fmt);
void ZStrSTDest(STEX * st);
void ZStrSTAppend(STEX * st, ZSTR * z);
void ZStrSTSort(STEX * st);
void * ZStrSTFind(STEX * st, void * x);
typedef struct
{
uint64_t kperw; /* K keys per word */
uint64_t kmax; /* (prime) number of keys */
uint64_t tiptop; /* number of spaces in tuber */
uint64_t wct; /* number of 64-bit words */
long lenlen; /* length of length string */
uint64_t mult; /* length bits per initial 1-bit */
uint64_t * tub; /* tuber data pointer */
} TUBER;
#define TUBER_BITS_8 1
#define TUBER_BITS_16 2
#define TUBER_BITS_32 3
#define TUBER_BITS_64 4
TUBER * ZStrTuberCons(size_t size, int options);
void ZStrTuberDest(TUBER * t);
void ZStrTuberStats(TUBER * t, int query, uint64_t * stats);
int ZStrTuberRead(TUBER * t, uint64_t kkey, ZSTR * z);
int ZStrTuberUpdate(TUBER * t, uint64_t kkey, ZSTR * z);
void ZStrTuberDelete(TUBER * t, uint64_t kkey);
#define INSFAIL 128000
uint64_t ZStrTuberIns(TUBER * t, uint64_t d1, uint64_t d2);
uint64_t ZStrTuberK(TUBER * t, uint64_t d1,
uint64_t d2, uint64_t keyb);
/* end of zstr.h */

View File

@ -0,0 +1,488 @@
/* zstring regression program */
/* R. A. Parker 15.11.2012 */
#include <stdio.h>
#include <stdint.h>
#include "zstr.h"
int err;
void ckint(int x, int was, int shdbe)
{
if(was==shdbe) return;
err++;
printf("Error %d, was %x (%d), should be %x\n",x,was,was,shdbe);
}
void ZDUMP(ZSTR * z)
{
int i;
printf("alloc %d firstix %d lastix %d\n",
z->alloc,z->firstix,z->lastix);
for(i=z->firstix;i<=z->lastix;i++)
printf("ix %d, val %16llx length %d\n",
i,(unsigned long long)z->dat[i],(int)z->dlen[i]);
}
void TUBDUMP(TUBER * t)
{
long i1,i2,i3,i4,i5,i6;
int i;
long long ff;
i1=t->kperw;
i2=t->kmax;
i3=t->wct;
i4=t->tiptop;
i5=t->lenlen;
i6=t->mult;
printf("kperw %ld, kmax %ld, wct %ld, ",
i1,i2,i3);
printf("tiptop %ld, lenlen %ld, mult %ld\n",
i4,i5,i6);
for(i=0;i<i3;i++)
{
ff=(long long)t->tub[i];
printf("%16llx ",ff);
if(i%5==4) printf("\n");
}
if((i%5)!=0) printf("\n");
}
int main(int argc, char ** argv)
{
uint16_t y[10];
/* first test code 0xx 10xxx 11xxxx */
/* 0-3 4-11 12-27 */
uint64_t tx1[]={0,4,12,28};
uint64_t tc1[]={0,0x10,0x30};
uint8_t tl1[]={3,5,6};
uint8_t tsg1[]={0,0,1,2};
ZCOD zc1 = {1,3,0,2,tx1,tc1,tl1,tsg1,tl1,tl1};
/* second test code 0xx 10xxx 11xxxx */
/* 0-3 4-11 12-27 */
/* after translation 0 1 2 3 4 5 6 */
/* goes to 4 5 0 2 1 6 3 */
uint64_t tx2[]={0,4,12,28};
uint64_t tc2[]={0,0x10,0x30};
uint8_t tl2[]={3,5,6};
uint8_t tsg2[]={0,0,1,2};
uint8_t ttx2[]={4,5,0,2,1,6,3};
uint8_t tux2[]={2,4,3,6,0,1,5};
ZCOD zc2 = {2,3,6,2,tx2,tc2,tl2,tsg2,ttx2,tux2};
/* third test code 0xx 10xxx 11xxxx */
/* with delta 0-3 4-11 12-27 */
uint64_t tx3[]={0,4,12,28};
uint64_t tc3[]={0,0x10,0x30};
uint8_t tl3[]={3,5,6};
uint8_t tsg3[]={0,0,1,2};
ZCOD zc3 = {3,3,0,2,tx3,tc3,tl3,tsg3,tl3,tl3};
STEX * st;
uint16_t sw0[]={0x0000};
uint16_t sw2[]={0xFFFC};
TUBER * t1;
uint64_t stats[20];
ZSTR * z1;
CTX ctx;
uint64_t i,j,k;
uint64_t nokeys;
long len;
uint64_t d1,d2;
uint64_t b0,b1,b2,b3,b4,b5,b6,b7;
uint64_t k0,k1,k2,k3,k4,k5,k6,k7;
uint16_t * fw1;
int q;
err=0;
/* */
/* 001 - 020 First batch to just exercise the simple */
/* bit handling routines a little */
z1=ZStrCons(3);
ckint(1,z1->alloc,3); /* did it allocate OK */
len=ZStrLen(z1);
ckint(2,len,0); /* len=0 at start */
ZStrBitsIn(0x05A792,24,z1);
len=ZStrLen(z1);
ckint(3,len,24); /* len=24 now */
ZStrBitsIn(0xF,4,z1);
len=ZStrLen(z1);
ckint(4,len,28); /* len=28 now */
j=ZStrBitsPeek(z1,16);
ckint(5,j,0x05A7); /* first 16 bits */
len=ZStrLen(z1);
ckint(6,len,28); /* length the same */
j=ZStrBitsOut(z1,8);
ckint(7,j,0x05); /* first 8 bits */
len=ZStrLen(z1);
ckint(8,len,20); /* length 20 now */
j=ZStrBitsPeek(z1,28);
ckint(9,j,0xA792F00); /* first 28 bits! */
j=ZStrBitsOut(z1,12);
ckint(10,j,0xA79); /* last 12 bits */
len=ZStrLen(z1);
ckint(11,len,8); /* length 8 now */
ZStrBitsIn(0xC0,8,z1);
len=ZStrLen(z1);
ckint(12,len,16); /* length 16 */
j=ZStrBitsPeek(z1,16);
ckint(13,j,0x2FC0); /* 0x2FC0 -> Normalize */
ZStrNormalize(z1);
j=ZStrBitsPeek(z1,16);
ckint(14,j,0x2FC0); /* still 0x2FC0 */
len=ZStrLen(z1);
ckint(15,len,10); /* length 11 now */
ZStrClear(z1);
len=ZStrLen(z1);
ckint(16,len,0); /* length 0 now */
j=ZStrBitsPeek(z1,28);
ckint(17,j,0); /* last 28 bits all 0 */
j=ZStrBitsOut(z1,12);
ckint(18,j,0); /* last 12 bits all 0 */
ZStrDest(z1);
/* */
/* 0021 - 039 Next batch to test basic Enc/Decode */
/* */
/* test code 0xx 10xxx 11xxxx */
/* 0-3 4-11 12-27 (28+ illegal) */
z1=ZStrCons(3);
ZStrCxEnc(z1,&zc1,&ctx,3);
len=ZStrLen(z1);
ckint(21,len,3); /* length 3 now */
j=ZStrBitsPeek(z1,5);
ckint(22,j,0xC); /* 011 00 */
j=ZStrCxDec(z1,&zc1,&ctx);
ckint(23,j,3);
len=ZStrLen(z1);
ckint(24,len,0); /* length 0 now */
ZStrClear(z1);
ZStrCxEnc(z1,&zc1,&ctx,27); /* put in limit values */
ZStrCxEnc(z1,&zc1,&ctx,4);
ZStrCxEnc(z1,&zc1,&ctx,3);
ZStrCxEnc(z1,&zc1,&ctx,12);
ZStrCxEnc(z1,&zc1,&ctx,11);
ZStrCxEnc(z1,&zc1,&ctx,0);
len=ZStrLen(z1);
ckint(25,len,28); /* length should be 28 */
ZStrNormalize(z1);
len=ZStrLen(z1);
ckint(26,len,25); /* length should be 25 */
j=ZStrCxDec(z1,&zc1,&ctx);
ckint(27,j,27);
j=ZStrCxDec(z1,&zc1,&ctx);
ckint(28,j,4);
j=ZStrCxDec(z1,&zc1,&ctx);
ckint(29,j,3);
j=ZStrCxDec(z1,&zc1,&ctx);
ckint(30,j,12);
j=ZStrCxDec(z1,&zc1,&ctx);
ckint(31,j,11);
j=ZStrCxDec(z1,&zc1,&ctx);
ckint(32,j,0);
ZStrClear(z1);
j=0;
for(i=0;i<1000;i++)
{
j+=11;
if(j>27) j-=28;
ZStrCxEnc(z1,&zc1,&ctx,j);
}
ZStrNormalize(z1);
j=0;
for(i=0;i<1000;i++)
{
j+=11;
if(j>27) j-=28;
k=ZStrCxDec(z1,&zc1,&ctx);
ckint(33,k,j);
}
len=ZStrLen(z1);
ckint(34,len,0);
ZStrNormalize(z1);
ZStrDest(z1);
/* */
/* 0041 - 059 Next batch to test type 2 Enc/Decode */
/* second test code 0xx 10xxx 11xxxx */
/* 0-3 4-11 12-27 */
/* after translation 0 1 2 3 4 5 6 */
/* goes to 4 5 0 2 1 6 3 */
z1=ZStrCons(3);
ZStrCxEnc(z1,&zc2,&ctx,6);
len=ZStrLen(z1);
ckint(41,len,3); /* length 3 now */
j=ZStrBitsPeek(z1,5);
ckint(42,j,0xC); /* 011 00 */
j=ZStrCxDec(z1,&zc2,&ctx);
ckint(43,j,6);
len=ZStrLen(z1);
ckint(44,len,0); /* length 0 now */
ZStrClear(z1);
ZStrCxEnc(z1,&zc2,&ctx,27); /* put in limit values */
ZStrCxEnc(z1,&zc2,&ctx,0); /* 4 */
ZStrCxEnc(z1,&zc2,&ctx,6); /* 3 */
ZStrCxEnc(z1,&zc2,&ctx,12);
ZStrCxEnc(z1,&zc2,&ctx,11);
ZStrCxEnc(z1,&zc2,&ctx,2); /* 0 */
len=ZStrLen(z1);
ckint(45,len,28); /* length should be 28 */
ZStrNormalize(z1);
len=ZStrLen(z1);
ckint(46,len,25); /* length should be 25 */
j=ZStrCxDec(z1,&zc2,&ctx);
ckint(47,j,27);
j=ZStrCxDec(z1,&zc2,&ctx);
ckint(48,j,0);
j=ZStrCxDec(z1,&zc2,&ctx);
ckint(49,j,6);
j=ZStrCxDec(z1,&zc2,&ctx);
ckint(50,j,12);
j=ZStrCxDec(z1,&zc2,&ctx);
ckint(51,j,11);
j=ZStrCxDec(z1,&zc2,&ctx);
ckint(52,j,2);
ZStrClear(z1);
j=0;
for(i=0;i<1000;i++)
{
j+=11;
if(j>27) j-=28;
ZStrCxEnc(z1,&zc2,&ctx,j);
}
ZStrNormalize(z1);
j=0;
for(i=0;i<1000;i++)
{
j+=11;
if(j>27) j-=28;
k=ZStrCxDec(z1,&zc2,&ctx);
ckint(53,k,j);
}
len=ZStrLen(z1);
ckint(54,len,0);
ZStrNormalize(z1);
ZStrDest(z1);
/* */
/* 0060 - 079 Test Xlate and UnXl */
/* after translation 0 1 2 3 4 5 6 */
/* goes to 4 5 0 2 1 6 3 */
k=ZStrXlate(&zc2,0);
ckint(60,k,4);
k=ZStrXlate(&zc2,1);
ckint(61,k,5);
k=ZStrXlate(&zc2,2);
ckint(62,k,0);
k=ZStrXlate(&zc2,3);
ckint(63,k,2);
k=ZStrXlate(&zc2,4);
ckint(64,k,1);
k=ZStrXlate(&zc2,5);
ckint(65,k,6);
k=ZStrXlate(&zc2,6);
ckint(66,k,3);
k=ZStrXlate(&zc2,7);
ckint(67,k,7);
k=ZStrXlate(&zc2,17);
ckint(68,k,17);
k=ZStrXlate(&zc2,77777);
ckint(69,k,77777);
k=ZStrUnXl(&zc2,0);
ckint(70,k,2);
k=ZStrUnXl(&zc2,1);
ckint(71,k,4);
k=ZStrUnXl(&zc2,2);
ckint(72,k,3);
k=ZStrUnXl(&zc2,3);
ckint(73,k,6);
k=ZStrUnXl(&zc2,4);
ckint(74,k,0);
k=ZStrUnXl(&zc2,5);
ckint(75,k,1);
k=ZStrUnXl(&zc2,6);
ckint(76,k,5);
k=ZStrUnXl(&zc2,7);
ckint(77,k,7);
k=ZStrUnXl(&zc2,17);
ckint(78,k,17);
k=ZStrUnXl(&zc2,7777);
ckint(79,k,7777);
/* */
/* 0080 - 099 Test Enc/Decode of type 3 (delta) code */
/* */
/* test code 0xx 10xxx 11xxxx DELTA */
/* 0-3 4-11 12-27 (28+ illegal) */
z1=ZStrCons(3);
ZStrCxClear(&zc3,&ctx);
ZStrCxEnc(z1,&zc3,&ctx,3);
ZStrCxEnc(z1,&zc3,&ctx,5);
ZStrCxEnc(z1,&zc3,&ctx,9); /* 011 010 10000 */
len=ZStrLen(z1);
ckint(80,len,11); /* length 11 now */
j=ZStrBitsPeek(z1,10);
ckint(81,j,0x1A8); /* 011 010 1000 */
ZStrNormalize(z1);
j=ZStrBitsPeek(z1,10);
ckint(82,j,0x1A8); /* 011 010 1000 */
ZStrCxClear(&zc3,&ctx);
j=ZStrCxDec(z1,&zc3,&ctx);
ckint(83,j,3);
j=ZStrCxDec(z1,&zc3,&ctx);
ckint(84,j,5);
j=ZStrCxDec(z1,&zc3,&ctx);
ckint(85,j,9);
len=ZStrLen(z1);
ckint(86,len,0); /* length 0 now */
j=ZStrCxDec(z1,&zc3,&ctx);
ckint(87,j,9);
j=ZStrCxDec(z1,&zc3,&ctx);
ckint(88,j,9);
ZStrClear(z1);
ZStrCxClear(&zc3,&ctx);
j=0;
for(i=0;i<1000;i++)
{
j+=4;
ZStrCxEnc(z1,&zc3,&ctx,j);
}
ZStrNormalize(z1);
ZStrCxClear(&zc3,&ctx);
j=0;
for(i=0;i<1000;i++)
{
j+=4;
k=ZStrCxDec(z1,&zc3,&ctx);
ckint(89,k,j);
}
len=ZStrLen(z1);
ckint(90,len,0);
ZStrDest(z1);
/* */
/* 100 - 119 Test Extract, Insert and ExtLen */
z1=ZStrCons(3);
ZStrBitsIn(0xDEADBEEF,32,z1);
len=ZStrLen(z1);
ckint(100,len,32); /* len=32 now */
len=ZStrMaxLen(z1,2);
ckint(101,len,3);
len=ZStrExtract(z1,(void *)y,2);
ckint(102,len,3);
ckint(103,y[0],0xDEAE);
ckint(104,y[1],0xBEEF);
ckint(105,y[2],0x8000);
ZStrDest(z1);
z1=ZStrCons(5);
ZStrInsert(z1,(void *)y,2);
len=ZStrLen(z1);
ckint(106,len,32);
j=ZStrBitsOut(z1,32);
ckint(107,j,0xDEADBEEF);
ZStrDest(z1);
/* 200 - 299 - test the codes and hashes */
/* not yet written */
/* 300 - 399 test the tuber things */
z1=ZStrCons(3);
for(q=0;q<400;q+=100)
{
if(q==0) t1 = ZStrTuberCons(152,TUBER_BITS_8);
if(q==100) t1 = ZStrTuberCons(152,TUBER_BITS_16);
if(q==200) t1 = ZStrTuberCons(152,TUBER_BITS_32);
if(q==300) t1 = ZStrTuberCons(152,TUBER_BITS_64);
nokeys=t1->kmax;
d1=nokeys/2;
d2=0;
/* try inserting three items with same keya */
/* should get keybe as 0, 1 and 2 respectively. */
/* this relies on three inserts working with keyb*/
/* coming out as 0, 1 and 2. Change construction*/
/* size if this doesn't work. */
b0=ZStrTuberIns(t1,d1,d2);
ckint(300+q,b0,0);
b1=ZStrTuberIns(t1,d1,d2);
ckint(301+q,b1,1);
b2=ZStrTuberIns(t1,d1,d2);
ckint(302+q,b2,2);
k0=ZStrTuberK(t1,d1,d2,b0);
k1=ZStrTuberK(t1,d1,d2,b1);
k2=ZStrTuberK(t1,d1,d2,b2);
ZStrTuberDelete(t1,k0);
ZStrBitsIn(0xDEAD,16,z1);
ZStrNormalize(z1);
ZStrTuberUpdate(t1,k1,z1);
ZStrClear(z1);
j=ZStrTuberRead(t1,k1,z1);
ckint(303+q,j,0);
len=ZStrLen(z1);
ckint(304+q,len,16);
j=ZStrBitsOut(z1,16);
ckint(305+q,j,0xDEAD); /* get our data back */
ZStrTuberDest(t1);
}
ZStrDest(z1);
/* */
/* 700 - 799 STEX testing - sorting the words */
/* */
z1=ZStrCons(3);
st=ZStrSTCons(2);
ZStrBitsIn(0xDB,8,z1);
ZStrSTAppend(st,z1);
ZStrSTSort(st);
fw1=ZStrSTFind(st,(void*) sw0);
if(fw1!=NULL) ckint(704,*fw1,7777);
fw1=ZStrSTFind(st,(void*) sw2);
ckint(705,*fw1,0xDB00);
ZStrSTDest(st);
st=ZStrSTCons(2);
for(i=1;i<100;i++)
{
j=(17*i)%97;
ZStrClear(z1);
ZStrBitsIn(j,8,z1);
ZStrSTAppend(st,z1);
}
ZStrSTSort(st);
ZStrSTDest(st);
ZStrDest(z1);
/* */
/* 800 - 810 LastEnc and LastDec testing */
/* */
z1=ZStrCons(5);
for(i=0;i<10000;i++)
{
ZStrClear(z1);
ZStrLastEnc(z1,i);
j=ZStrLastDec(z1);
ckint(800,j,i);
}
ZStrDest(z1);
printf("End of z-string regression - %d errors\n",err);
return 0;
}
/* end of zstring regression module */