1
0
Fork 0

some speedups for JSON parsing, correction of Unicode escape sequence handling

This commit is contained in:
Jan Steemann 2012-07-05 09:31:56 +02:00
parent 1a43f1d54a
commit 829acdcbe9
6 changed files with 265 additions and 167 deletions

View File

@ -276,6 +276,22 @@ BOOST_AUTO_TEST_CASE (tst_json_string_utf8_2) {
FREE_BUFFER
}
////////////////////////////////////////////////////////////////////////////////
/// @brief test string value (unicode surrogate pair)
////////////////////////////////////////////////////////////////////////////////
BOOST_AUTO_TEST_CASE (tst_json_string_utf8_3) {
INIT_BUFFER
TRI_json_t* json = TRI_CreateStringCopyJson(TRI_UNKNOWN_MEM_ZONE, (char*) "a𝛢");
STRINGIFY
BOOST_CHECK_EQUAL("\"a\\uD835\\uDEE2\"", STRING_VALUE);
FREE_JSON
FREE_BUFFER
}
////////////////////////////////////////////////////////////////////////////////
/// @brief test empty json list
////////////////////////////////////////////////////////////////////////////////

View File

@ -631,6 +631,22 @@ void TRI_Insert3ArrayJson (TRI_memory_zone_t* zone, TRI_json_t* object, char con
TRI_Free(zone, subobject);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief adds a new attribute, not copying it but freeing the pointer
////////////////////////////////////////////////////////////////////////////////
void TRI_Insert4ArrayJson (TRI_memory_zone_t* zone, TRI_json_t* object, char* name, size_t nameLength, TRI_json_t* subobject) {
TRI_json_t copy;
copy._type = TRI_JSON_STRING;
copy._value._string.length = nameLength + 1;
copy._value._string.data = name;
TRI_PushBackVector(&object->_value._objects, &copy);
TRI_PushBackVector(&object->_value._objects, subobject);
TRI_Free(zone, subobject);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief looks up an attribute in an json array
////////////////////////////////////////////////////////////////////////////////

View File

@ -240,6 +240,14 @@ void TRI_Insert2ArrayJson (TRI_memory_zone_t*, TRI_json_t* object, char const* n
void TRI_Insert3ArrayJson (TRI_memory_zone_t*, TRI_json_t* object, char const* name, TRI_json_t* subobject);
////////////////////////////////////////////////////////////////////////////////
/// @brief adds a new attribute, not copying it but freeing the pointer
///
/// the JSON struct created takes ownership of the name* passed
////////////////////////////////////////////////////////////////////////////////
void TRI_Insert4ArrayJson (TRI_memory_zone_t* zone, TRI_json_t* object, char* name, size_t nameLength, TRI_json_t* subobject);
////////////////////////////////////////////////////////////////////////////////
/// @brief looks up an attribute in an json array
////////////////////////////////////////////////////////////////////////////////

View File

@ -1135,10 +1135,6 @@ char* TRI_UnescapeUtf8StringZ (TRI_memory_zone_t* zone, char const* in, size_t i
char * qtr;
char const * ptr;
char const * end;
char c1;
char c2;
char c3;
char c4;
buffer = TRI_Allocate(zone, inLength + 1, false);
@ -1153,24 +1149,12 @@ char* TRI_UnescapeUtf8StringZ (TRI_memory_zone_t* zone, char const* in, size_t i
++ptr;
switch (*ptr) {
case '/':
*qtr = '/';
break;
case '\\':
*qtr = '\\';
break;
case '"':
*qtr = '"';
break;
case 'b':
*qtr = '\b';
break;
case 'f':
*qtr = 'f';
*qtr = '\f';
break;
case 'n':
@ -1186,24 +1170,31 @@ char* TRI_UnescapeUtf8StringZ (TRI_memory_zone_t* zone, char const* in, size_t i
break;
case 'u':
// expecting at least 6 characters: \uXXXX
if (ptr + 4 < end) {
c1 = ptr[1];
c2 = ptr[2];
// check, if we have a surrogate pair
if (ptr + 10 < end) {
bool sp;
c3 = ptr[7];
c4 = ptr[8];
char c1 = ptr[1];
sp = (c1 == 'd' || c1 == 'D');
sp &= (c2 == '8' || c2 == '9' || c2 == 'A' || c2 == 'a' || c2 == 'B' || c2 == 'b');
sp &= (ptr[5] == '\\' || ptr[6] == 'u');
sp &= (c3 == 'd' || c3 == 'D');
sp &= (c4 == 'C' || c4 == 'c' || c4 == 'D' || c4 == 'd' || c4 == 'E' || c4 == 'e' || c4 == 'F' || c4 == 'f');
if (sp) {
char c2 = ptr[2];
sp &= (c2 == '8' || c2 == '9' || c2 == 'A' || c2 == 'a' || c2 == 'B' || c2 == 'b');
}
if (sp) {
char c3 = ptr[7];
sp &= (ptr[5] == '\\' && ptr[6] == 'u');
sp &= (c3 == 'd' || c3 == 'D');
}
if (sp) {
char c4 = ptr[8];
sp &= (c4 == 'C' || c4 == 'c' || c4 == 'D' || c4 == 'd' || c4 == 'E' || c4 == 'e' || c4 == 'F' || c4 == 'f');
}
if (sp) {
DecodeSurrogatePair(&qtr, ptr + 1, ptr + 7);
@ -1219,35 +1210,43 @@ char* TRI_UnescapeUtf8StringZ (TRI_memory_zone_t* zone, char const* in, size_t i
ptr += 4;
}
}
// ignore wrong format
else {
*qtr = *ptr;
}
break;
default:
// this includes cases \/, \\, and \"
*qtr = *ptr;
break;
}
}
else {
*qtr = *ptr;
}
*qtr = *ptr;
}
*qtr = '\0';
*outLength = qtr - buffer;
qtr = TRI_Allocate(zone, *outLength + 1, false);
// we might have wasted some space if the unescaped string is shorter than the
// escaped one. this is the case if the string contained escaped characters
if (*outLength < (ptr - in)) {
// result string is shorter than original string
qtr = TRI_Allocate(zone, *outLength + 1, false);
if (qtr != NULL) {
memcpy(qtr, buffer, *outLength + 1);
TRI_Free(zone, buffer);
if (qtr != NULL) {
memcpy(qtr, buffer, *outLength + 1);
return qtr;
}
// intentional fall-through
}
TRI_Free(TRI_CORE_MEM_ZONE, buffer);
return qtr;
return buffer;
}
////////////////////////////////////////////////////////////////////////////////

View File

@ -391,8 +391,8 @@ static void yy_fatal_error (yyconst char msg[] ,yyscan_t yyscanner );
*yy_cp = '\0'; \
yyg->yy_c_buf_p = yy_cp;
#define YY_NUM_RULES 14
#define YY_END_OF_BUFFER 15
#define YY_NUM_RULES 15
#define YY_END_OF_BUFFER 16
/* This struct is not used in this scanner,
but its presence is necessary. */
struct yy_trans_info
@ -400,13 +400,13 @@ struct yy_trans_info
flex_int32_t yy_verify;
flex_int32_t yy_nxt;
};
static yyconst flex_int16_t yy_accept[43] =
static yyconst flex_int16_t yy_accept[45] =
{ 0,
12, 12, 15, 13, 12, 12, 13, 13, 10, 5,
5, 11, 13, 13, 13, 8, 9, 6, 7, 12,
0, 4, 0, 5, 5, 0, 5, 0, 0, 0,
5, 0, 0, 0, 0, 0, 2, 3, 0, 5,
1, 0
13, 13, 16, 14, 13, 13, 14, 14, 11, 6,
6, 12, 14, 14, 14, 9, 10, 7, 8, 13,
0, 0, 4, 0, 6, 6, 0, 6, 0, 0,
0, 5, 6, 0, 0, 0, 0, 0, 2, 3,
0, 6, 1, 0
} ;
static yyconst flex_int32_t yy_ec[256] =
@ -414,17 +414,17 @@ static yyconst flex_int32_t yy_ec[256] =
1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
1, 1, 2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 1, 4, 1, 1, 1, 1, 1, 1,
1, 1, 5, 6, 7, 8, 1, 9, 10, 10,
10, 10, 10, 10, 10, 10, 10, 11, 1, 1,
1, 1, 1, 1, 12, 1, 1, 1, 13, 14,
1, 1, 1, 1, 1, 15, 1, 16, 1, 1,
1, 17, 18, 19, 20, 1, 1, 1, 1, 1,
21, 22, 23, 1, 1, 1, 24, 1, 1, 1,
1, 4, 5, 6, 5, 5, 5, 5, 5, 5,
5, 5, 7, 8, 9, 10, 5, 11, 12, 12,
12, 12, 12, 12, 12, 12, 12, 13, 5, 5,
5, 5, 5, 5, 14, 5, 5, 5, 15, 16,
5, 5, 5, 5, 5, 17, 5, 18, 5, 5,
5, 19, 20, 21, 22, 5, 5, 5, 5, 5,
23, 24, 25, 5, 5, 5, 26, 5, 5, 5,
25, 26, 1, 1, 1, 1, 1, 27, 1, 28,
1, 1, 1, 29, 30, 31, 32, 1, 1, 1,
1, 1, 33, 1, 34, 1, 1, 1, 1, 1,
27, 28, 5, 5, 5, 5, 5, 29, 5, 30,
5, 5, 5, 31, 32, 33, 34, 5, 5, 5,
1, 1, 35, 1, 36, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -441,66 +441,72 @@ static yyconst flex_int32_t yy_ec[256] =
1, 1, 1, 1, 1
} ;
static yyconst flex_int32_t yy_meta[35] =
static yyconst flex_int32_t yy_meta[37] =
{ 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1
1, 1, 1, 1, 1, 1
} ;
static yyconst flex_int16_t yy_base[44] =
static yyconst flex_int16_t yy_base[47] =
{ 0,
0, 0, 94, 95, 33, 35, 35, 31, 95, 85,
34, 95, 34, 27, 31, 95, 95, 95, 95, 47,
41, 95, 82, 75, 43, 45, 56, 41, 46, 42,
62, 49, 54, 57, 79, 65, 95, 95, 67, 82,
95, 95, 79
0, 0, 110, 115, 35, 38, 42, 35, 115, 99,
39, 115, 38, 31, 36, 115, 115, 115, 115, 54,
48, 78, 115, 104, 96, 49, 51, 58, 54, 56,
52, 115, 76, 62, 72, 77, 86, 81, 115, 115,
64, 88, 115, 115, 104, 89
} ;
static yyconst flex_int16_t yy_def[44] =
static yyconst flex_int16_t yy_def[47] =
{ 0,
42, 1, 42, 42, 42, 42, 43, 42, 42, 42,
42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
43, 42, 43, 42, 42, 42, 42, 42, 42, 42,
42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
42, 0, 42
44, 1, 44, 44, 44, 44, 45, 44, 44, 44,
44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
46, 45, 44, 46, 44, 44, 44, 44, 44, 44,
44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
44, 44, 44, 0, 44, 44
} ;
static yyconst flex_int16_t yy_nxt[130] =
static yyconst flex_int16_t yy_nxt[152] =
{ 0,
4, 5, 6, 7, 8, 9, 8, 4, 10, 11,
12, 4, 4, 13, 4, 14, 4, 4, 15, 4,
16, 4, 17, 4, 4, 13, 4, 14, 4, 4,
15, 4, 18, 19, 20, 20, 20, 20, 22, 24,
25, 26, 27, 27, 22, 28, 29, 30, 20, 20,
26, 27, 27, 31, 31, 32, 23, 28, 29, 30,
33, 34, 23, 26, 27, 27, 36, 32, 37, 38,
31, 31, 33, 34, 35, 40, 40, 41, 36, 21,
37, 38, 26, 39, 42, 39, 35, 40, 40, 41,
40, 40, 26, 42, 3, 42, 42, 42, 42, 42,
4, 5, 6, 5, 4, 7, 8, 9, 8, 4,
10, 11, 12, 4, 4, 13, 4, 14, 4, 4,
15, 4, 16, 4, 17, 4, 4, 13, 4, 14,
4, 4, 15, 4, 18, 19, 20, 20, 20, 20,
20, 20, 21, 21, 21, 25, 26, 23, 27, 28,
28, 29, 30, 32, 31, 20, 20, 20, 27, 28,
28, 33, 33, 29, 30, 24, 31, 27, 28, 28,
34, 24, 35, 36, 42, 42, 21, 21, 21, 21,
21, 38, 34, 23, 35, 36, 33, 33, 39, 21,
37, 40, 41, 38, 41, 43, 42, 42, 42, 42,
42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
42, 42, 42, 42, 42, 42, 42, 42, 42
39, 24, 37, 40, 22, 27, 44, 43, 27, 44,
44, 44, 21, 21, 3, 44, 44, 44, 44, 44,
44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
44
} ;
static yyconst flex_int16_t yy_chk[130] =
static yyconst flex_int16_t yy_chk[152] =
{ 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 5, 5, 6, 6, 7, 8,
8, 11, 11, 11, 21, 13, 14, 15, 20, 20,
25, 25, 25, 26, 26, 28, 7, 13, 14, 15,
29, 30, 21, 27, 27, 27, 32, 28, 33, 34,
31, 31, 29, 30, 31, 39, 39, 36, 32, 43,
33, 34, 24, 35, 23, 35, 31, 35, 35, 36,
40, 40, 10, 3, 42, 42, 42, 42, 42, 42,
1, 1, 1, 1, 1, 1, 5, 5, 5, 6,
6, 6, 7, 7, 7, 8, 8, 7, 11, 11,
11, 13, 14, 21, 15, 20, 20, 20, 26, 26,
26, 27, 27, 13, 14, 7, 15, 28, 28, 28,
29, 21, 30, 31, 41, 41, 7, 7, 22, 22,
22, 34, 29, 22, 30, 31, 33, 33, 35, 46,
33, 36, 37, 34, 37, 38, 37, 37, 42, 42,
42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
42, 42, 42, 42, 42, 42, 42, 42, 42
35, 22, 33, 36, 45, 25, 24, 38, 10, 3,
0, 0, 22, 22, 44, 44, 44, 44, 44, 44,
44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
44
} ;
/* The intent behind this definition is that it'll catch
@ -524,6 +530,7 @@ static yyconst flex_int16_t yy_chk[130] =
#define COMMA 10
#define COLON 11
#define UNQUOTED_STRING 12
#define STRING_CONSTANT_ASCII 13
struct jsonData {
char const* _message;
@ -803,13 +810,13 @@ yy_match:
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
if ( yy_current_state >= 43 )
if ( yy_current_state >= 45 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
++yy_cp;
}
while ( yy_current_state != 42 );
while ( yy_current_state != 44 );
yy_cp = yyg->yy_last_accepting_cpos;
yy_current_state = yyg->yy_last_accepting_state;
@ -851,7 +858,16 @@ YY_RULE_SETUP
* strings
* ----------------------------------------------------------------------------- */
case 4:
/* rule 4 can match eol */
YY_RULE_SETUP
{
// performance optimisation for all-ASCII strings without escape characters
// this matches the ASCII chars with ordinal numbers 35 (x23) to 127 (x7f),
// plus space (32) and ! (33) but no quotation marks (34, x22) and backslashes (92, x5c)
return STRING_CONSTANT_ASCII;
}
YY_BREAK
case 5:
/* rule 5 can match eol */
YY_RULE_SETUP
{
return STRING_CONSTANT;
@ -860,7 +876,7 @@ YY_RULE_SETUP
/* -----------------------------------------------------------------------------
* numbers
* ----------------------------------------------------------------------------- */
case 5:
case 6:
YY_RULE_SETUP
{
return NUMBER_CONSTANT;
@ -869,37 +885,37 @@ YY_RULE_SETUP
/* -----------------------------------------------------------------------------
* special characters
* ----------------------------------------------------------------------------- */
case 6:
case 7:
YY_RULE_SETUP
{
return OPEN_BRACE;
}
YY_BREAK
case 7:
case 8:
YY_RULE_SETUP
{
return CLOSE_BRACE;
}
YY_BREAK
case 8:
case 9:
YY_RULE_SETUP
{
return OPEN_BRACKET;
}
YY_BREAK
case 9:
case 10:
YY_RULE_SETUP
{
return CLOSE_BRACKET;
}
YY_BREAK
case 10:
case 11:
YY_RULE_SETUP
{
return COMMA;
}
YY_BREAK
case 11:
case 12:
YY_RULE_SETUP
{
return COLON;
@ -909,19 +925,19 @@ YY_RULE_SETUP
* Skip whitespaces. Whatever is left, should be an unquoted string appearing
* somewhere. This will be reported as an error.
* ----------------------------------------------------------------------------- */
case 12:
/* rule 12 can match eol */
case 13:
/* rule 13 can match eol */
YY_RULE_SETUP
{
}
YY_BREAK
case 13:
case 14:
YY_RULE_SETUP
{
return UNQUOTED_STRING;
}
YY_BREAK
case 14:
case 15:
YY_RULE_SETUP
ECHO;
YY_BREAK
@ -1219,7 +1235,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
if ( yy_current_state >= 43 )
if ( yy_current_state >= 45 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
@ -1248,11 +1264,11 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
if ( yy_current_state >= 43 )
if ( yy_current_state >= 45 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
yy_is_jam = (yy_current_state == 42);
yy_is_jam = (yy_current_state == 44);
return yy_is_jam ? 0 : yy_current_state;
}
@ -2130,10 +2146,8 @@ static TRI_json_t* ParseArray (yyscan_t scanner) {
TRI_json_t* sub;
bool comma;
char* name;
char const* ptr;
size_t nameLen;
int c;
size_t len;
size_t outLength;
comma = false;
array = TRI_CreateArrayJson(yyextra._memoryZone);
@ -2163,16 +2177,29 @@ static TRI_json_t* ParseArray (yyscan_t scanner) {
}
// attribute name
if (c != STRING_CONSTANT) {
if (c == STRING_CONSTANT) {
// "complex" attribute name
size_t outLength;
nameLen = yyleng - 2;
// do proper unescaping
name = TRI_UnescapeUtf8StringZ(yyextra._memoryZone, yytext + 1, nameLen, &outLength);
}
else if (c == STRING_CONSTANT_ASCII) {
// ASCII-only attribute name
nameLen = yyleng - 2;
// no unescaping necessary. just copy it
name = TRI_DuplicateString2Z(yyextra._memoryZone, yytext + 1, nameLen);
}
else {
// some other token found => invalid
TRI_FreeJson(yyextra._memoryZone, array);
yyextra._message = "expecting attribute name";
return NULL;
}
ptr = yytext;
len = yyleng;
name = TRI_UnescapeUtf8StringZ(yyextra._memoryZone, ptr + 1, len - 2, &outLength);
if (name == NULL) {
TRI_FreeJson(yyextra._memoryZone, array);
yyextra._message = "out-of-memory";
@ -2189,7 +2216,7 @@ static TRI_json_t* ParseArray (yyscan_t scanner) {
return NULL;
}
// fallowed by an object
// followed by an object
c = tri_jsp_lex(scanner);
sub = ParseObject(scanner, c);
@ -2199,8 +2226,7 @@ static TRI_json_t* ParseArray (yyscan_t scanner) {
return NULL;
}
TRI_Insert3ArrayJson(yyextra._memoryZone, array, name, sub);
TRI_FreeString(yyextra._memoryZone, name);
TRI_Insert4ArrayJson(yyextra._memoryZone, array, name, nameLen, sub);
c = tri_jsp_lex(scanner);
}
@ -2220,10 +2246,6 @@ static TRI_json_t* ParseObject (yyscan_t scanner, int c) {
TRI_json_t* result;
switch (c) {
case END_OF_FILE:
yyextra._message = "expecting atom, got end-of-file";
return NULL;
case FALSE_CONSTANT:
result = TRI_CreateBooleanJson(yyextra._memoryZone, false);
@ -2252,19 +2274,15 @@ static TRI_json_t* ParseObject (yyscan_t scanner, int c) {
return result;
case NUMBER_CONSTANT: {
char buffer[512];
char* ep;
double d;
if ((size_t) yyleng >= sizeof(buffer)) {
if ((size_t) yyleng >= 512) {
yyextra._message = "number too big";
return NULL;
}
memcpy(buffer, yytext, yyleng);
buffer[yyleng] = '\0';
d = strtod(buffer, &ep);
d = strtod(yytext, &ep);
if (d == HUGE_VAL && errno == ERANGE) {
yyextra._message = "number too big";
@ -2276,7 +2294,8 @@ static TRI_json_t* ParseObject (yyscan_t scanner, int c) {
return NULL;
}
if (ep != buffer + yyleng) {
// if (ep != buffer + yyleng) {
if (ep != yytext + yyleng) {
yyextra._message = "cannot parse number";
return NULL;
}
@ -2309,17 +2328,27 @@ static TRI_json_t* ParseObject (yyscan_t scanner, int c) {
return result;
}
case STRING_CONSTANT_ASCII: {
result = TRI_CreateString2CopyJson(yyextra._memoryZone, yytext + 1, yyleng - 2);
if (result == NULL) {
yyextra._message = "out-of-memory";
}
return result;
}
case OPEN_BRACE:
return ParseArray(scanner);
case OPEN_BRACKET:
return ParseList(scanner);
case CLOSE_BRACE:
yyextra._message = "expected object, got '}'";
return NULL;
case OPEN_BRACKET:
return ParseList(scanner);
case CLOSE_BRACKET:
yyextra._message = "expected object, got ']'";
return NULL;
@ -2335,6 +2364,10 @@ static TRI_json_t* ParseObject (yyscan_t scanner, int c) {
case UNQUOTED_STRING:
yyextra._message = "expected object, got unquoted string";
return NULL;
case END_OF_FILE:
yyextra._message = "expecting atom, got end-of-file";
return NULL;
}
yyextra._message = "unknown atom";

View File

@ -67,6 +67,7 @@ PLUS [+]
#define COMMA 10
#define COLON 11
#define UNQUOTED_STRING 12
#define STRING_CONSTANT_ASCII 13
struct jsonData {
char const* _message;
@ -99,6 +100,14 @@ struct jsonData {
* strings
* ----------------------------------------------------------------------------- */
\"[ !\x23-\x5b\x5d-x7f]*\" {
// performance optimisation for all-ASCII strings without escape characters
// this matches the ASCII chars with ordinal numbers 35 (x23) to 127 (x7f),
// plus space (32) and ! (33) but no quotation marks (34, x22) and backslashes (92, x5c)
return STRING_CONSTANT_ASCII;
}
\"(\\.|[^\\\"])*\" {
return STRING_CONSTANT;
}
@ -239,10 +248,8 @@ static TRI_json_t* ParseArray (yyscan_t scanner) {
TRI_json_t* sub;
bool comma;
char* name;
char const* ptr;
size_t nameLen;
int c;
size_t len;
size_t outLength;
comma = false;
array = TRI_CreateArrayJson(yyextra._memoryZone);
@ -272,16 +279,29 @@ static TRI_json_t* ParseArray (yyscan_t scanner) {
}
// attribute name
if (c != STRING_CONSTANT) {
if (c == STRING_CONSTANT) {
// "complex" attribute name
size_t outLength;
nameLen = yyleng - 2;
// do proper unescaping
name = TRI_UnescapeUtf8StringZ(yyextra._memoryZone, yytext + 1, nameLen, &outLength);
}
else if (c == STRING_CONSTANT_ASCII) {
// ASCII-only attribute name
nameLen = yyleng - 2;
// no unescaping necessary. just copy it
name = TRI_DuplicateString2Z(yyextra._memoryZone, yytext + 1, nameLen);
}
else {
// some other token found => invalid
TRI_FreeJson(yyextra._memoryZone, array);
yyextra._message = "expecting attribute name";
return NULL;
}
ptr = yytext;
len = yyleng;
name = TRI_UnescapeUtf8StringZ(yyextra._memoryZone, ptr + 1, len - 2, &outLength);
if (name == NULL) {
TRI_FreeJson(yyextra._memoryZone, array);
yyextra._message = "out-of-memory";
@ -298,7 +318,7 @@ static TRI_json_t* ParseArray (yyscan_t scanner) {
return NULL;
}
// fallowed by an object
// followed by an object
c = yylex(scanner);
sub = ParseObject(scanner, c);
@ -308,8 +328,7 @@ static TRI_json_t* ParseArray (yyscan_t scanner) {
return NULL;
}
TRI_Insert3ArrayJson(yyextra._memoryZone, array, name, sub);
TRI_FreeString(yyextra._memoryZone, name);
TRI_Insert4ArrayJson(yyextra._memoryZone, array, name, nameLen, sub);
c = yylex(scanner);
}
@ -329,10 +348,6 @@ static TRI_json_t* ParseObject (yyscan_t scanner, int c) {
TRI_json_t* result;
switch (c) {
case END_OF_FILE:
yyextra._message = "expecting atom, got end-of-file";
return NULL;
case FALSE_CONSTANT:
result = TRI_CreateBooleanJson(yyextra._memoryZone, false);
@ -361,19 +376,16 @@ static TRI_json_t* ParseObject (yyscan_t scanner, int c) {
return result;
case NUMBER_CONSTANT: {
char buffer[512];
char* ep;
double d;
if ((size_t) yyleng >= sizeof(buffer)) {
if ((size_t) yyleng >= 512) {
yyextra._message = "number too big";
return NULL;
}
memcpy(buffer, yytext, yyleng);
buffer[yyleng] = '\0';
d = strtod(buffer, &ep);
// yytext is null-terminated. can use it directly without copying it into a temporary buffer
d = strtod(yytext, &ep);
if (d == HUGE_VAL && errno == ERANGE) {
yyextra._message = "number too big";
@ -385,7 +397,7 @@ static TRI_json_t* ParseObject (yyscan_t scanner, int c) {
return NULL;
}
if (ep != buffer + yyleng) {
if (ep != yytext + yyleng) {
yyextra._message = "cannot parse number";
return NULL;
}
@ -418,17 +430,27 @@ static TRI_json_t* ParseObject (yyscan_t scanner, int c) {
return result;
}
case STRING_CONSTANT_ASCII: {
result = TRI_CreateString2CopyJson(yyextra._memoryZone, yytext + 1, yyleng - 2);
if (result == NULL) {
yyextra._message = "out-of-memory";
}
return result;
}
case OPEN_BRACE:
return ParseArray(scanner);
case OPEN_BRACKET:
return ParseList(scanner);
case CLOSE_BRACE:
yyextra._message = "expected object, got '}'";
return NULL;
case OPEN_BRACKET:
return ParseList(scanner);
case CLOSE_BRACKET:
yyextra._message = "expected object, got ']'";
return NULL;
@ -444,6 +466,10 @@ static TRI_json_t* ParseObject (yyscan_t scanner, int c) {
case UNQUOTED_STRING:
yyextra._message = "expected object, got unquoted string";
return NULL;
case END_OF_FILE:
yyextra._message = "expecting atom, got end-of-file";
return NULL;
}
yyextra._message = "unknown atom";