mirror of https://gitee.com/bigwinds/arangodb
641 lines
16 KiB
LLVM
641 lines
16 KiB
LLVM
%top{
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief json parser
|
|
///
|
|
/// @file
|
|
///
|
|
/// DISCLAIMER
|
|
///
|
|
/// Copyright 2004-2012 triagens GmbH, Cologne, Germany
|
|
///
|
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
|
/// you may not use this file except in compliance with the License.
|
|
/// You may obtain a copy of the License at
|
|
///
|
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
|
///
|
|
/// Unless required by applicable law or agreed to in writing, software
|
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
/// See the License for the specific language governing permissions and
|
|
/// limitations under the License.
|
|
///
|
|
/// Copyright holder is triAGENS GmbH, Cologne, Germany
|
|
///
|
|
/// @author Dr. Frank Celler
|
|
/// @author Copyright 2011-2012, triAGENS GmbH, Cologne, Germany
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "Basics/Common.h"
|
|
|
|
#include "Basics/json.h"
|
|
#include "Basics/tri-strings.h"
|
|
#include "Basics/logging.h"
|
|
|
|
#ifdef _WIN32
|
|
#define YY_NO_UNISTD_H 1
|
|
#else
|
|
#ifndef __FreeBSD__
|
|
int fileno(FILE *stream);
|
|
#endif
|
|
#endif
|
|
|
|
#define YY_NO_INPUT
|
|
}
|
|
|
|
%option noyywrap nounput batch
|
|
%option 8bit
|
|
%option reentrant
|
|
%option extra-type="struct jsonData"
|
|
%option prefix="tri_jsp_"
|
|
|
|
ZERO [0]
|
|
DIGIT [0-9]
|
|
DIGIT1 [1-9]
|
|
MINUS [-]
|
|
PLUS [+]
|
|
|
|
%{
|
|
#define END_OF_FILE 0
|
|
#define FALSE_CONSTANT 1
|
|
#define TRUE_CONSTANT 2
|
|
#define NULL_CONSTANT 3
|
|
#define NUMBER_CONSTANT 4
|
|
#define STRING_CONSTANT 5
|
|
#define OPEN_BRACE 6
|
|
#define CLOSE_BRACE 7
|
|
#define OPEN_BRACKET 8
|
|
#define CLOSE_BRACKET 9
|
|
#define COMMA 10
|
|
#define COLON 11
|
|
#define UNQUOTED_STRING 12
|
|
#define STRING_CONSTANT_ASCII 13
|
|
|
|
static char const* EmptyString = "";
|
|
|
|
struct jsonData {
|
|
TRI_memory_zone_t* _memoryZone;
|
|
char const* _message;
|
|
};
|
|
|
|
#define YY_FATAL_ERROR(a) \
|
|
do { \
|
|
LOG_DEBUG("json-parser: %s", (a)); \
|
|
if (false) { \
|
|
yy_fatal_error(a, nullptr); \
|
|
} \
|
|
} \
|
|
while (0)
|
|
%}
|
|
|
|
%%
|
|
|
|
/* -----------------------------------------------------------------------------
|
|
* keywords
|
|
* ----------------------------------------------------------------------------- */
|
|
|
|
(?i:false) {
|
|
return FALSE_CONSTANT;
|
|
}
|
|
|
|
(?i:null) {
|
|
return NULL_CONSTANT;
|
|
}
|
|
|
|
(?i:true) {
|
|
return TRUE_CONSTANT;
|
|
}
|
|
|
|
/* -----------------------------------------------------------------------------
|
|
* strings
|
|
* ----------------------------------------------------------------------------- */
|
|
|
|
\"[ !\x23-\x5b\x5d-\x7f]*\" {
|
|
// performance optimisation for all-ASCII strings without escape characters
|
|
// this matches the ASCII chars with ordinal numbers 35 (x23) to 127 (x7f),
|
|
// plus space (32) and ! (33) but no quotation marks (34, x22) and backslashes (92, x5c)
|
|
return STRING_CONSTANT_ASCII;
|
|
}
|
|
|
|
|
|
\"(\\.|[^\\\"])*\" {
|
|
return STRING_CONSTANT;
|
|
}
|
|
|
|
/* -----------------------------------------------------------------------------
|
|
* numbers
|
|
* ----------------------------------------------------------------------------- */
|
|
|
|
({MINUS}|{PLUS})?({ZERO}|({DIGIT1}{DIGIT}*))((\.{DIGIT}+)?([eE]({MINUS}|{PLUS})?{DIGIT}+)?)? {
|
|
return NUMBER_CONSTANT;
|
|
}
|
|
|
|
/* -----------------------------------------------------------------------------
|
|
* special characters
|
|
* ----------------------------------------------------------------------------- */
|
|
|
|
"{" {
|
|
return OPEN_BRACE;
|
|
}
|
|
|
|
"}" {
|
|
return CLOSE_BRACE;
|
|
}
|
|
|
|
"[" {
|
|
return OPEN_BRACKET;
|
|
}
|
|
|
|
"]" {
|
|
return CLOSE_BRACKET;
|
|
}
|
|
|
|
"," {
|
|
return COMMA;
|
|
}
|
|
|
|
":" {
|
|
return COLON;
|
|
}
|
|
|
|
/* -----------------------------------------------------------------------------
|
|
* Skip whitespaces. Whatever is left, should be an unquoted string appearing
|
|
* somewhere. This will be reported as an error.
|
|
* ----------------------------------------------------------------------------- */
|
|
|
|
[ \t\r\n]* {
|
|
}
|
|
|
|
. {
|
|
return UNQUOTED_STRING;
|
|
}
|
|
|
|
%%
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- forward declarations
|
|
// -----------------------------------------------------------------------------
|
|
|
|
static bool ParseObject (yyscan_t, TRI_json_t*);
|
|
static bool ParseValue (yyscan_t, TRI_json_t*, int);
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- private functions
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief do not use, only here to silence compiler
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void TRI_JsonError (const char* msg) {
|
|
YY_FATAL_ERROR(msg);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief parses an array
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static bool ParseArray (yyscan_t scanner, TRI_json_t* result) {
|
|
struct yyguts_t * yyg = (struct yyguts_t*) scanner;
|
|
|
|
TRI_InitArrayJson(yyextra._memoryZone, result);
|
|
|
|
int c = yylex(scanner);
|
|
bool comma = false;
|
|
|
|
while (c != END_OF_FILE) {
|
|
if (c == CLOSE_BRACKET) {
|
|
return true;
|
|
}
|
|
|
|
if (comma) {
|
|
if (c != COMMA) {
|
|
yyextra._message = "expecting comma";
|
|
return false;
|
|
}
|
|
|
|
c = yylex(scanner);
|
|
}
|
|
else {
|
|
comma = true;
|
|
}
|
|
|
|
{
|
|
// optimization: get the address of the next element in the array
|
|
// so we can create the upcoming element in place
|
|
TRI_json_t* next = static_cast<TRI_json_t*>(TRI_NextVector(&result->_value._objects));
|
|
|
|
if (next == nullptr) {
|
|
yyextra._message = "out-of-memory";
|
|
return false;
|
|
}
|
|
|
|
// be paranoid and initialize the memory
|
|
TRI_InitNullJson(next);
|
|
|
|
if (! ParseValue(scanner, next, c)) {
|
|
// be paranoid
|
|
|
|
return false;
|
|
}
|
|
}
|
|
|
|
c = yylex(scanner);
|
|
}
|
|
|
|
yyextra._message = "expecting a list element, got end-of-file";
|
|
|
|
return false;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief parse an object
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static bool ParseObject (yyscan_t scanner, TRI_json_t* result) {
|
|
struct yyguts_t * yyg = (struct yyguts_t*) scanner;
|
|
|
|
bool comma = false;
|
|
TRI_InitObjectJson(yyextra._memoryZone, result);
|
|
|
|
int c = yylex(scanner);
|
|
|
|
while (c != END_OF_FILE) {
|
|
if (c == CLOSE_BRACE) {
|
|
return true;
|
|
}
|
|
|
|
if (comma) {
|
|
if (c != COMMA) {
|
|
yyextra._message = "expecting comma";
|
|
return false;
|
|
}
|
|
|
|
c = yylex(scanner);
|
|
}
|
|
else {
|
|
comma = true;
|
|
}
|
|
|
|
char* name;
|
|
size_t nameLen;
|
|
|
|
// attribute name
|
|
if (c == STRING_CONSTANT) {
|
|
// utf-8 attribute name
|
|
size_t outLength;
|
|
nameLen = yyleng - 2;
|
|
|
|
// do proper unescaping
|
|
name = TRI_UnescapeUtf8String(yyextra._memoryZone, yytext + 1, nameLen, &outLength);
|
|
nameLen = outLength;
|
|
}
|
|
else if (c == STRING_CONSTANT_ASCII) {
|
|
// ASCII-only attribute name
|
|
nameLen = yyleng - 2;
|
|
|
|
// no unescaping necessary. just copy it
|
|
name = TRI_DuplicateString2Z(yyextra._memoryZone, yytext + 1, nameLen);
|
|
}
|
|
else {
|
|
// some other token found => invalid
|
|
yyextra._message = "expecting attribute name";
|
|
return false;
|
|
}
|
|
|
|
if (name == nullptr) {
|
|
yyextra._message = "out-of-memory";
|
|
return false;
|
|
}
|
|
|
|
// followed by a colon
|
|
c = yylex(scanner);
|
|
|
|
if (c != COLON) {
|
|
TRI_FreeString(yyextra._memoryZone, name);
|
|
yyextra._message = "expecting colon";
|
|
return false;
|
|
}
|
|
|
|
// followed by an object
|
|
c = yylex(scanner);
|
|
|
|
{
|
|
// optimization: we allocate room for two elements at once
|
|
int res = TRI_ReserveVector(&result->_value._objects, 2);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
yyextra._message = "out-of-memory";
|
|
return false;
|
|
}
|
|
|
|
// get the address of the next element so we can create the attribute name in place
|
|
TRI_json_t* next = static_cast<TRI_json_t*>(TRI_NextVector(&result->_value._objects));
|
|
// we made sure with the reserve call that we haven't run out of memory
|
|
TRI_ASSERT_EXPENSIVE(next != nullptr);
|
|
|
|
// store attribute name
|
|
TRI_InitStringJson(next, name, nameLen);
|
|
|
|
// now process the value
|
|
next = static_cast<TRI_json_t*>(TRI_NextVector(&result->_value._objects));
|
|
// we made sure with the reserve call that we haven't run out of memory
|
|
TRI_ASSERT_EXPENSIVE(next != nullptr);
|
|
|
|
// be paranoid and initialize the memory
|
|
TRI_InitNullJson(next);
|
|
|
|
if (! ParseValue(scanner, next, c)) {
|
|
// be paranoid
|
|
return false;
|
|
}
|
|
}
|
|
|
|
c = yylex(scanner);
|
|
}
|
|
|
|
yyextra._message = "expecting a object attribute name or element, got end-of-file";
|
|
|
|
return false;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief parse an object
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static bool ParseValue (yyscan_t scanner, TRI_json_t* result, int c) {
|
|
struct yyguts_t * yyg = (struct yyguts_t*) scanner;
|
|
|
|
switch (c) {
|
|
case FALSE_CONSTANT:
|
|
TRI_InitBooleanJson(result, false);
|
|
|
|
return true;
|
|
|
|
case TRUE_CONSTANT:
|
|
TRI_InitBooleanJson(result, true);
|
|
|
|
return true;
|
|
|
|
case NULL_CONSTANT:
|
|
TRI_InitNullJson(result);
|
|
|
|
return true;
|
|
|
|
case NUMBER_CONSTANT: {
|
|
char* ep;
|
|
double d;
|
|
|
|
if ((size_t) yyleng >= 512) {
|
|
yyextra._message = "number too big";
|
|
return false;
|
|
}
|
|
|
|
// need to reset errno because return value of 0 is not distinguishable from an error on Linux
|
|
errno = 0;
|
|
|
|
// yytext is null-terminated. can use it directly without copying it into a temporary buffer
|
|
d = strtod(yytext, &ep);
|
|
|
|
if (d == HUGE_VAL && errno == ERANGE) {
|
|
yyextra._message = "number too big";
|
|
return false;
|
|
}
|
|
|
|
if (d == 0 && errno == ERANGE) {
|
|
yyextra._message = "number too small";
|
|
return false;
|
|
}
|
|
|
|
if (ep != yytext + yyleng) {
|
|
yyextra._message = "cannot parse number";
|
|
return false;
|
|
}
|
|
|
|
TRI_InitNumberJson(result, d);
|
|
|
|
return true;
|
|
}
|
|
|
|
case STRING_CONSTANT: {
|
|
if (yyleng <= 2) {
|
|
// string is empty
|
|
char const* ptr = EmptyString; // we'll create a reference to this compiled-in string
|
|
TRI_InitStringReferenceJson(result, ptr, 0);
|
|
}
|
|
else {
|
|
// string is not empty, process it
|
|
size_t outLength;
|
|
char* ptr = TRI_UnescapeUtf8String(yyextra._memoryZone, yytext + 1, yyleng - 2, &outLength);
|
|
if (ptr == nullptr) {
|
|
yyextra._message = "out-of-memory";
|
|
return false;
|
|
}
|
|
|
|
TRI_InitStringJson(result, ptr, outLength);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
case STRING_CONSTANT_ASCII: {
|
|
if (yyleng <= 2) {
|
|
// string is empty
|
|
char const* ptr = EmptyString; // we'll create a reference to this compiled-in string
|
|
TRI_InitStringReferenceJson(result, ptr, 0);
|
|
}
|
|
else {
|
|
char* ptr = TRI_DuplicateString2Z(yyextra._memoryZone, yytext + 1, yyleng - 2);
|
|
|
|
if (ptr == nullptr) {
|
|
yyextra._message = "out-of-memory";
|
|
return false;
|
|
}
|
|
|
|
TRI_InitStringJson(result, ptr, yyleng - 2);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
case OPEN_BRACE:
|
|
return ParseObject(scanner, result);
|
|
|
|
case OPEN_BRACKET:
|
|
return ParseArray(scanner, result);
|
|
|
|
case CLOSE_BRACE:
|
|
yyextra._message = "expected object, got '}'";
|
|
return false;
|
|
|
|
case CLOSE_BRACKET:
|
|
yyextra._message = "expected object, got ']'";
|
|
return false;
|
|
|
|
case COMMA:
|
|
yyextra._message = "expected object, got ','";
|
|
return false;
|
|
|
|
case COLON:
|
|
yyextra._message = "expected object, got ':'";
|
|
return false;
|
|
|
|
case UNQUOTED_STRING:
|
|
yyextra._message = "expected object, got unquoted string";
|
|
return false;
|
|
|
|
case END_OF_FILE:
|
|
yyextra._message = "expecting atom, got end-of-file";
|
|
return false;
|
|
}
|
|
|
|
yyextra._message = "unknown atom";
|
|
return false;
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- public functions
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief parses a json string
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
TRI_json_t* TRI_Json2String (TRI_memory_zone_t* zone, char const* text, char** error) {
|
|
TRI_json_t* object;
|
|
YY_BUFFER_STATE buf;
|
|
int c;
|
|
struct yyguts_t * yyg;
|
|
yyscan_t scanner;
|
|
|
|
object = static_cast<TRI_json_t*>
|
|
(TRI_Allocate(zone, sizeof(TRI_json_t), false));
|
|
|
|
if (object == nullptr) {
|
|
// out of memory
|
|
return nullptr;
|
|
}
|
|
|
|
// init as a JSON null object so the memory in object is initialized
|
|
TRI_InitNullJson(object);
|
|
|
|
yylex_init(&scanner);
|
|
yyg = (struct yyguts_t*) scanner;
|
|
|
|
yyextra._memoryZone = zone;
|
|
|
|
buf = yy_scan_string((char yyconst*) text, scanner);
|
|
|
|
c = yylex(scanner);
|
|
if (! ParseValue(scanner, object, c)) {
|
|
TRI_FreeJson(zone, object);
|
|
object = nullptr;
|
|
LOG_DEBUG("failed to parse json object: '%s'", yyextra._message);
|
|
}
|
|
else {
|
|
c = yylex(scanner);
|
|
|
|
if (c != END_OF_FILE) {
|
|
TRI_FreeJson(zone, object);
|
|
object = nullptr;
|
|
yyextra._message = "failed to parse json object: expecting EOF";
|
|
|
|
LOG_DEBUG("failed to parse json object: expecting EOF");
|
|
}
|
|
}
|
|
|
|
if (error != nullptr) {
|
|
if (yyextra._message != nullptr) {
|
|
*error = TRI_DuplicateString(yyextra._message);
|
|
}
|
|
else {
|
|
*error = nullptr;
|
|
}
|
|
}
|
|
|
|
yy_delete_buffer(buf, scanner);
|
|
yylex_destroy(scanner);
|
|
|
|
return object;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief parses a json string
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
TRI_json_t* TRI_JsonString (TRI_memory_zone_t* zone, char const* text) {
|
|
return TRI_Json2String(zone, text, nullptr);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief parses a json file
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
TRI_json_t* TRI_JsonFile (TRI_memory_zone_t* zone, char const* path, char** error) {
|
|
FILE* in;
|
|
TRI_json_t* value;
|
|
int c;
|
|
struct yyguts_t * yyg;
|
|
yyscan_t scanner;
|
|
|
|
value = static_cast<TRI_json_t*>(TRI_Allocate(zone, sizeof(TRI_json_t), false));
|
|
|
|
if (value == nullptr) {
|
|
// out of memory
|
|
return nullptr;
|
|
}
|
|
|
|
in = fopen(path, "rb");
|
|
|
|
if (in == nullptr) {
|
|
LOG_ERROR("cannot open file '%s': '%s'", path, TRI_LAST_ERROR_STR);
|
|
TRI_Free(zone, value);
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
// init as a JSON null object so the memory in value is initialized
|
|
TRI_InitNullJson(value);
|
|
|
|
yylex_init(&scanner);
|
|
yyg = (struct yyguts_t*) scanner;
|
|
|
|
yyextra._memoryZone = zone;
|
|
yyin = in;
|
|
|
|
c = yylex(scanner);
|
|
if (! ParseValue(scanner, value, c)) {
|
|
TRI_FreeJson(zone, value);
|
|
value = nullptr;
|
|
LOG_DEBUG("failed to parse json value: '%s'", yyextra._message);
|
|
}
|
|
else {
|
|
c = yylex(scanner);
|
|
|
|
if (c != END_OF_FILE) {
|
|
TRI_FreeJson(zone, value);
|
|
value = nullptr;
|
|
LOG_DEBUG("failed to parse json value: expecting EOF");
|
|
}
|
|
}
|
|
|
|
if (error != nullptr) {
|
|
if (yyextra._message != nullptr) {
|
|
*error = TRI_DuplicateString(yyextra._message);
|
|
}
|
|
else {
|
|
*error = nullptr;
|
|
}
|
|
}
|
|
|
|
yylex_destroy(scanner);
|
|
|
|
fclose(in);
|
|
|
|
return value;
|
|
}
|
|
|
|
// Local Variables:
|
|
// mode: C
|
|
// mode: outline-minor
|
|
// outline-regexp: "^\\(/// @brief\\|/// {@inheritDoc}\\|/// @addtogroup\\|// --SECTION--\\|/// @\\}\\)"
|
|
// End:
|