diff --git a/arangod/SkipListsEx/skiplistEx.c b/arangod/SkipListsEx/skiplistEx.c index 27a78a12da..96825370bc 100644 --- a/arangod/SkipListsEx/skiplistEx.c +++ b/arangod/SkipListsEx/skiplistEx.c @@ -36,8 +36,6 @@ #include #endif -#define SKIPLIST_EX_ABSOLUTE_MAX_HEIGHT 100 -#define SKIPLIST_EX_CAS_FAILURES_MAX_LOOP 10 // ----------------------------------------------------------------------------- // --SECTION-- SKIPLIST_EX @@ -47,23 +45,21 @@ // ----------------------------------------------------------------------------- // --SECTION-- Private Type Structures // ----------------------------------------------------------------------------- -typedef enum { - // the nearest neighbour node is normal - TRI_SKIPLIST_EX_NORMAL_NEAREST_NEIGHBOUR_FLAG, - // the nearest neighbour node is bricked - TRI_SKIPLIST_EX_BRICKED_NEAREST_NEIGHBOUR_FLAG, +typedef enum { + TRI_SKIPLIST_EX_NORMAL_NEAREST_NEIGHBOUR_FLAG, // the nearest neighbour node is normal + TRI_SKIPLIST_EX_BRICKED_NEAREST_NEIGHBOUR_FLAG, // the nearest neighbour node is bricked - next/prev pointers can not be modified - TRI_SKIPLIST_EX_NORMAL_TOWER_NODE_FLAG, // normal tower node, no destruction or insertion pending - TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG, // glass tower node, see through skip the node and go directly to - // its successor and/or predecessor - TRI_SKIPLIST_EX_FREE_TO_GROW_START_END_NODES_FLAG, - TRI_SKIPLIST_EX_NOT_FREE_TO_GROW_START_END_NODES_FLAG, - TRI_SKIPLIST_EX_DESTROY_NODE_PENDING_FLAG, // the node is in the process of being destroyed - TRI_SKIPLIST_EX_INSERT_NODE_PENDING_FLAG, // the node is in the process of begin inserted - TRI_SKIPLIST_EX_UNDEFINED_TOWER_NODE_FLAG // danger danger Will Robinson + TRI_SKIPLIST_EX_NORMAL_TOWER_NODE_FLAG, // normal tower node, no removal pending + TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG, // glass tower node, skipped in a lookup, removal pending + + TRI_SKIPLIST_EX_FREE_TO_GROW_START_END_NODES_FLAG, // start/end nodes special this and flag below ensures that + TRI_SKIPLIST_EX_NOT_FREE_TO_GROW_START_END_NODES_FLAG // the tower height of these nodes is performed sequentially } TRI_skiplistEx_tower_node_flag_e; +static unsigned int CAS_FAILURE_SLEEP_TIME = 1000; +static unsigned int SKIPLIST_EX_ABSOLUTE_MAX_HEIGHT = 100; +static unsigned int SKIPLIST_EX_CAS_FAILURES_MAX_LOOP = 10; // ----------------------------------------------------------------------------- // --SECTION-- STATIC FORWARD DECLARATIONS @@ -80,10 +76,9 @@ static void* NextNodeBaseSkipListEx (TRI_skiplistEx_base_t*, void*, uint64_t); static void* PrevNodeBaseSkipListEx (TRI_skiplistEx_base_t*, void*, uint64_t); static int32_t RandLevel (TRI_skiplistEx_base_t*); - static void JoinStartEndNodes (TRI_skiplistEx_node_t*, TRI_skiplistEx_node_t*, uint32_t, uint32_t); -static int JoinNewNodeCas (TRI_skiplistEx_node_t* newNode); -static int UnJoinOldNodeCas (TRI_skiplistEx_node_t* oldNode); +static int JoinNewNodeCas (TRI_skiplistEx_node_t* newNode); // when node is inserted +static int UnJoinOldNodeCas (TRI_skiplistEx_node_t* oldNode); // when node is removed // ----------------------------------------------------------------------------- // --SECTION-- unique skiplist constructors and destructors @@ -98,6 +93,12 @@ static int UnJoinOldNodeCas (TRI_skiplistEx_node_t* oldNode); /// @brief initialises an skip list //////////////////////////////////////////////////////////////////////////////// +// ............................................................................. +// TODO: The static integer variables CAS_FAILURE_SLEEP_TIME(1000), +// SKIPLIST_EX_ABSOLUTE_MAX_HEIGHT(100) and SKIPLIST_EX_CAS_FAILURES_MAX_LOOP(10) +// should be adjusted upon startup of the server -- command line perhaps? +// ............................................................................. + int TRI_InitSkipListEx (TRI_skiplistEx_t* skiplist, size_t elementSize, int (*compareElementElement) (TRI_skiplistEx_t*, void*, void*, int), int (*compareKeyElement) (TRI_skiplistEx_t*, void*, void*, int), @@ -220,7 +221,7 @@ int TRI_InitSkipListEx (TRI_skiplistEx_t* skiplist, size_t elementSize, // .......................................................................... result = GrowNewNodeHeight(&(skiplist->_base._startNode), skiplist->_base._maxHeight, 2,TRI_ERROR_NO_ERROR); // may fail result = GrowNewNodeHeight(&(skiplist->_base._endNode), skiplist->_base._maxHeight, 2, result); // may fail - + if (result != TRI_ERROR_NO_ERROR) { TRI_Free(TRI_UNKNOWN_MEM_ZONE, (void*)(skiplist->_base._random)); @@ -235,6 +236,7 @@ int TRI_InitSkipListEx (TRI_skiplistEx_t* skiplist, size_t elementSize, return result; } + // .......................................................................... // Join the empty lists together // no locking requirements for joining nodes since the skip list index is not known @@ -433,11 +435,12 @@ int TRI_InsertKeySkipListEx (TRI_skiplistEx_t* skiplist, // the skiplist we are // ........................................................................... - // Provide a simple non-blocking, block? + // Provide a simple non-blocking, lock? The sleep time should be something + // needs to be adjusted. // ........................................................................... if (casFailures > -1) { - usleep(1000); + usleep(CAS_FAILURE_SLEEP_TIME); } @@ -489,13 +492,14 @@ int TRI_InsertKeySkipListEx (TRI_skiplistEx_t* skiplist, // the skiplist we are // ......................................................................... - // Is our next node a glass node? If so we must skip it! - // Note: since Garbage Collection is performed in TWO passes, it is possible - // that we have more than one glass node. + // An insert/lookup/removal SEARCH like this, can ONLY ever find 1 glass + // node when we are very unlucky. (The GC makes the node glass and then + // goes and unlinks the pointers.) If we skip the glass node, then we + // will have the wrong pointers to compare, so we have to CAS_RESTART // ......................................................................... - if (TRI_CompareIntegerUInt32(&(nextNode->_towerFlag),TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG)) { - goto START; + if (nextNode->_towerFlag == TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG) { + goto CAS_RESTART; } @@ -561,8 +565,8 @@ int TRI_InsertKeySkipListEx (TRI_skiplistEx_t* skiplist, // the skiplist we are // ....................................................................... - // The element is greater than the next node element. Keep going on this - // level. + // The element to be inserted has a key which greater than the next node's + // element key. Keep going on this level. // ....................................................................... if (compareResult > 0) { @@ -680,7 +684,7 @@ int TRI_InsertKeySkipListEx (TRI_skiplistEx_t* skiplist, // the skiplist we are // Pass 3: Modify the newnode.prev.next to newnode and newnode.next.prev = newnode // .......................................................................... - result = JoinNewNodeCas (newNode); + result = JoinNewNodeCas(newNode); if (result == TRI_WARNING_ARANGO_INDEX_SKIPLIST_INSERT_CAS_FAILURE) { goto CAS_RESTART; } @@ -698,27 +702,6 @@ int TRI_InsertKeySkipListEx (TRI_skiplistEx_t* skiplist, // the skiplist we are //////////////////////////////////////////////////////////////////////////////// void* TRI_LeftLookupByKeySkipListEx(TRI_skiplistEx_t* skiplist, void* key, uint64_t thisTransID) { - return NULL; -} - - - -//////////////////////////////////////////////////////////////////////////////// -/// @brief locate a node using an element -//////////////////////////////////////////////////////////////////////////////// - -void* TRI_LookupByElementSkipListEx (TRI_skiplistEx_t* skiplist, void* element, uint64_t thisTransID) { - assert(false); // there is no way we can be here - return NULL; -} - - - -//////////////////////////////////////////////////////////////////////////////// -/// @brief returns node which matches a key -//////////////////////////////////////////////////////////////////////////////// - -void* TRI_LookupByKeySkipListEx (TRI_skiplistEx_t* skiplist, void* key, uint64_t thisTransID) { int32_t currentLevel; TRI_skiplistEx_node_t* currentNode; TRI_skiplistEx_node_t* nextNode; @@ -756,7 +739,7 @@ void* TRI_LookupByKeySkipListEx (TRI_skiplistEx_t* skiplist, void* key, uint64_t if (casFailures > -1) { assert(0); // a test to see why it blocks - should not block! - usleep(1000); + usleep(CAS_FAILURE_SLEEP_TIME); } @@ -810,7 +793,217 @@ void* TRI_LookupByKeySkipListEx (TRI_skiplistEx_t* skiplist, void* key, uint64_t // that we have more than one glass node. // ......................................................................... - if (TRI_CompareIntegerUInt32(&(nextNode->_towerFlag),TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG)) { + if (nextNode->_towerFlag == TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG) { + goto START; + } + + + // ......................................................................... + // WE HAVE FOUR CASES TO CONSIDER + // ......................................................................... + + // ......................................................................... + // CASE ONE: + // At this level we have the smallest (start) and largest (end) nodes ONLY. + // CASE TWO: + // We have arrived at the end of the nodes and we are not at the + // start of the nodes either. + // ......................................................................... + + if (nextNode == &(skiplist->_base._endNode)) { + + + // ....................................................................... + // We are at the lowest level of the lists, and we haven't found the item + // yet. The currentNode does NOT compare and the next node is +\infinty. + // ....................................................................... + + if (currentLevel == 0) { + return currentNode; + } + + + // ....................................................................... + // We have not yet reached the lowest level continue down. Possibly our + // item we seek is to be found a lower level. + // ....................................................................... + + nextNode = currentNode; + --currentLevel; + goto START; + } + + + + // ......................................................................... + // CASE THREE: + // We are the smallest left most node and the NEXT node is NOT the end node. + // Compare this element with the element in the right node to see what we do. + // CASE FOUR: + // We are somewhere in the middle of a list, away from the smallest and + // largest nodes. + // ......................................................................... + + else { // nextNode != &(skiplist->_endNode + int compareResult; + + // ....................................................................... + // Use the callback to determine if the element is less or greater than + // the next node element. We treat the comparison by assuming we are + // looking for a "key - epsilon". With this assumption we always find the + // last key to our right if it exists. The reason this is necessary is as + // follows: we allow a multiple documents with the same key to be stored + // here with the proviso that all but the last one is marked as deleted. + // This is how we cater for multiple revisions. + // ....................................................................... + + compareResult = IndexStaticCompareKeyElement(skiplist,key,&(nextNode->_element), -1); + + // ....................................................................... + // -1 is returned if the number of fields (attributes) in the key is LESS + // than the number of fields in the index definition. This has the effect + // of being slightly less efficient since we have to proceed to the level + // 0 list in the set of skip lists. + // ....................................................................... + + // ....................................................................... + // We have found the item! + // ....................................................................... + + if (compareResult == 0) { + assert(false); + } + + if (compareResult > 0) { + currentNode = nextNode; + goto START; + } + + + // ....................................................................... + // The element is less than the next node. Can we drop down the list? + // If have reached the lowest level of the lists -- no such item. + // ....................................................................... + + if (currentLevel == 0) { + return currentNode; + } + + + // ....................................................................... + // Drop down the list + // ....................................................................... + + --currentLevel; + nextNode = currentNode; + goto START; + } + + } // end of label START + + } // end of label CAS_RESTART + + assert(false); + return NULL; +} + + +//////////////////////////////////////////////////////////////////////////////// +/// @brief returns node which matches a key +//////////////////////////////////////////////////////////////////////////////// + +void* TRI_LookupByKeySkipListEx (TRI_skiplistEx_t* skiplist, void* key, uint64_t thisTransID) { + int32_t currentLevel; + TRI_skiplistEx_node_t* currentNode; + TRI_skiplistEx_node_t* nextNode; + int casFailures = -1; + + // ........................................................................... + // Just in case ... + // ........................................................................... + + if (skiplist == NULL) { + LOG_ERROR("Internal Error"); + return NULL; + } + + + // ........................................................................... + // Big loop to restart the whole search routine + // ........................................................................... + + CAS_RESTART: { + + // ........................................................................... + // To stop this loop CAS_RESTART becomming an infinite loop, use this check + // ........................................................................... + + if (casFailures == SKIPLIST_EX_CAS_FAILURES_MAX_LOOP) { + LOG_ERROR("CAS Failure"); + return NULL; + } + + + // ........................................................................... + // Provide a simple non-blocking, block? + // ........................................................................... + + if (casFailures > -1) { + usleep(CAS_FAILURE_SLEEP_TIME); + } + + + // ........................................................................... + // Increment the cas failures (which should always be hopefully 0). + // ........................................................................... + + ++casFailures; + + + // ........................................................................... + // Determine the starting level and the starting node + // ........................................................................... + + currentLevel = skiplist->_base._startNode._colLength - 1; + currentNode = &(skiplist->_base._startNode); + nextNode = currentNode; + + + START: { + + + // ......................................................................... + // Find the next node in the current level of the lists. Protect yourself + // in case something has gone wrong. + // ......................................................................... + + if (nextNode == NULL) { + LOG_ERROR("CAS Failure"); + assert(0); + goto CAS_RESTART; + } + + + // ......................................................................... + // We require the successor of the current node so we can perform a + // comparison. It should never be null. + // ......................................................................... + + nextNode = (TRI_skiplistEx_node_t*)(nextNode->_column[currentLevel]._next); + if (nextNode == NULL) { + LOG_ERROR("CAS Failure"); + assert(0); + goto CAS_RESTART; + } + + + // ......................................................................... + // Is our next node a glass node? If so we must skip it! + // Note: since Garbage Collection is performed in TWO passes, it is possible + // that we have more than one glass node. + // ......................................................................... + + if (nextNode->_towerFlag == TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG) { goto START; } @@ -900,9 +1093,10 @@ void* TRI_LookupByKeySkipListEx (TRI_skiplistEx_t* skiplist, void* key, uint64_t // ................................................................... // This node has been inserted AFTER the reading starting reading! + // Treat this as if the node was NEVER there. // ................................................................... - return NULL; + //return NULL; } @@ -915,7 +1109,8 @@ void* TRI_LookupByKeySkipListEx (TRI_skiplistEx_t* skiplist, void* key, uint64_t // ................................................................... // Node has NOT been deleted (e.g. imagine it will be deleted some - // time in the future). This is the node we want. + // time in the future). This is the node we want, even though it may + // be deleted very very soon. // ................................................................... return nextNode; @@ -991,7 +1186,8 @@ void* TRI_PrevNodeSkipListEx(TRI_skiplistEx_t* skiplist, void* currentNode, uint //////////////////////////////////////////////////////////////////////////////// int TRI_RemoveElementSkipListEx (TRI_skiplistEx_t* skiplist, void* element, void* old, - const int passLevel, const uint64_t thisTransID) { + const int passLevel, const uint64_t thisTransID, + TRI_skiplistEx_node_t** passNode) { // ........................................................................... // To remove an element from this skip list we have three pass levels: // Pass 1: locate (if possible) the exact NODE - must match exactly. @@ -1045,8 +1241,7 @@ int TRI_RemoveElementSkipListEx (TRI_skiplistEx_t* skiplist, void* element, void // ........................................................................... if (casFailures > -1) { - assert(0); // a test to see why it blocks - should not block! - usleep(1000); + usleep(CAS_FAILURE_SLEEP_TIME); } @@ -1094,15 +1289,15 @@ int TRI_RemoveElementSkipListEx (TRI_skiplistEx_t* skiplist, void* element, void // ......................................................................... - // Is our next node a glass node? If so we must skip it! - // Note: since Garbage Collection is performed in TWO passes, it is possible - // that we have more than one glass node. + // Is our next node a glass node? If so we must skip it. Recall we are in + // Phase I here -- meaning that we are searching for a node which has not + // be removed and previously inserted. // ......................................................................... - - if (TRI_CompareIntegerUInt32(&(nextNode->_towerFlag),TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG)) { + + if (nextNode->_towerFlag == TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG) { goto START; } - + // ......................................................................... // WE HAVE FOUR CASES TO CONSIDER @@ -1179,7 +1374,7 @@ int TRI_RemoveElementSkipListEx (TRI_skiplistEx_t* skiplist, void* element, void // key but of course now must be marked as deleted. // ................................................................... - return TRI_WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_MISSING; + return TRI_WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_POST_INSERTED; } @@ -1199,7 +1394,7 @@ int TRI_RemoveElementSkipListEx (TRI_skiplistEx_t* skiplist, void* element, void goto END; } - // ..................................................................... + // ..................................................................... // The only case left here is that the node has been deleted by either // this transaction (which could happen in an UPDATE) or by some // previous write transaction. Treat this case as if the element is @@ -1260,6 +1455,9 @@ int TRI_RemoveElementSkipListEx (TRI_skiplistEx_t* skiplist, void* element, void if (old != NULL) { IndexStaticCopyElementElement(&(skiplist->_base), old, &(currentNode->_element)); } + + *passNode = currentNode; + return TRI_ERROR_NO_ERROR; } @@ -1269,29 +1467,32 @@ int TRI_RemoveElementSkipListEx (TRI_skiplistEx_t* skiplist, void* element, void // other connected nodes. // ......................................................................... case 2: { - bool ok; + // ....................................................................... // We can not now rely upon looking up the node using the key, since // we would need to traverse right and attempt to match either then // transaction id and/or the pointer to the doc. Easier to simply // send the address of the node back. // ....................................................................... - if (element == NULL) { + if (*passNode == NULL) { return TRI_ERROR_INTERNAL; } - currentNode = (TRI_skiplistEx_node_t*)(element); + currentNode = (TRI_skiplistEx_node_t*)(*passNode); + + // ....................................................................... // Only the Garbage Collector can transform a node into a glass node, and // since the GC is only operating in one thread safe to do a simple // comparison here. // ....................................................................... - if (currentNode->_towerFlag != TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG) { + if (currentNode->_towerFlag == TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG) { return TRI_ERROR_INTERNAL; } + // ....................................................................... // safety check - ok = TRI_CompareIntegerUInt64 (&(currentNode->_delTransID), thisTransID); - if (!ok) { + // ....................................................................... + if (currentNode->_delTransID != thisTransID) { return TRI_ERROR_INTERNAL; } @@ -1320,18 +1521,17 @@ int TRI_RemoveElementSkipListEx (TRI_skiplistEx_t* skiplist, void* element, void // we simply destroy it. we require the node to be glass. // ......................................................................... case 3: { - - bool ok; // ....................................................................... // We can not now rely upon looking up the node using the key, since // we would need to traverse right and attempt to match either then // transaction id and/or the pointer to the doc. Easier to simply // send the address of the node back. // ....................................................................... - if (element == NULL) { + if (*passNode == NULL) { return TRI_ERROR_INTERNAL; } - currentNode = (TRI_skiplistEx_node_t*)(element); + currentNode = (TRI_skiplistEx_node_t*)(*passNode); + // ....................................................................... // Only the Garbage Collector can transform a node into a glass node, and // since the GC is only operating in one thread safe to do a simple @@ -1341,9 +1541,10 @@ int TRI_RemoveElementSkipListEx (TRI_skiplistEx_t* skiplist, void* element, void return TRI_ERROR_INTERNAL; } + // ....................................................................... // safety check - ok = TRI_CompareIntegerUInt64 (&(currentNode->_delTransID), thisTransID); - if (!ok) { + // ....................................................................... + if (currentNode->_delTransID != thisTransID) { return TRI_ERROR_INTERNAL; } @@ -1373,7 +1574,8 @@ int TRI_RemoveElementSkipListEx (TRI_skiplistEx_t* skiplist, void* element, void //////////////////////////////////////////////////////////////////////////////// int TRI_RemoveKeySkipListEx(TRI_skiplistEx_t* skiplist, void* key, void* old, - const int passLevel, const uint64_t thisTransID) { + const int passLevel, const uint64_t thisTransID, + TRI_skiplistEx_node_t** passNode) { // Use the TRI_RemoveElementSkipList method instead. assert(false); return 0; @@ -1389,130 +1591,207 @@ void* TRI_RightLookupByKeySkipListEx(TRI_skiplistEx_t* skiplist, void* key, uint int32_t currentLevel; TRI_skiplistEx_node_t* currentNode; TRI_skiplistEx_node_t* prevNode; + int casFailures = -1; // ........................................................................... - // Just in case + // Just in case ... // ........................................................................... if (skiplist == NULL) { + LOG_ERROR("Internal Error"); return NULL; } - + // ........................................................................... - // Determine the starting level and the starting node + // Big loop to restart the whole search routine // ........................................................................... - currentLevel = skiplist->_base._startNode._colLength - 1; - currentNode = &(skiplist->_base._endNode); - + CAS_RESTART: { - START: - + // ........................................................................... + // To stop this loop CAS_RESTART becomming an infinite loop, use this check + // ........................................................................... + + if (casFailures == SKIPLIST_EX_CAS_FAILURES_MAX_LOOP) { + LOG_ERROR("CAS Failure"); + return NULL; + } + + + // ........................................................................... + // Provide a simple non-blocking, block? + // ........................................................................... + + if (casFailures > -1) { + assert(0); // a test to see why it blocks - should not block! + usleep(CAS_FAILURE_SLEEP_TIME); + } - // ......................................................................... - // Find the next node in the current level of the lists. - // ......................................................................... - prevNode = (TRI_skiplistEx_node_t*)(currentNode->_column[currentLevel]._prev); - - - - // ......................................................................... - // WE HAVE FOUR CASES TO CONSIDER - // ......................................................................... + + // ........................................................................... + // Increment the cas failures (which should always be hopefully 0). + // ........................................................................... + + ++casFailures; + + + // ........................................................................... + // Determine the starting level and the starting node + // ........................................................................... - // ......................................................................... - // CASE ONE: - // At this level we have the smallest (start) and largest (end) nodes ONLY. - // CASE TWO: - // We have arrived at the end of the nodes and we are not at the - // start of the nodes either. - // ......................................................................... + currentLevel = skiplist->_base._endNode._colLength - 1; + currentNode = &(skiplist->_base._endNode); + prevNode = currentNode; + + + START: { + - if (prevNode == &(skiplist->_base._startNode)) { - - // ....................................................................... - // We are at the lowest level of the lists, and we haven't found the item - // yet. Eventually we would like to return iterators. - // ....................................................................... - if (currentLevel == 0) { - return currentNode; + // ......................................................................... + // Find the next node in the current level of the lists. Protect yourself + // in case something has gone wrong. + // ......................................................................... + + if (prevNode == NULL) { + LOG_ERROR("CAS Failure"); + assert(0); + goto CAS_RESTART; } - // ....................................................................... - // We have not yet reached the lowest level continue down. - // ....................................................................... - --currentLevel; - goto START; - } - - - - // ......................................................................... - // CASE THREE: - // We are the smallest left most node and the NEXT node is NOT the end node. - // Compare this element with the element in the right node to see what we do. - // CASE FOUR: - // We are somewhere in the middle of a list, away from the smallest and - // largest nodes. - // ......................................................................... - - else { // nextNode != &(skiplist->_endNode - // ....................................................................... - // Use the callback to determine if the element is less or greater than - // the next node element. - // ....................................................................... - int compareResult = IndexStaticCompareKeyElement(skiplist, key, &(prevNode->_element), 1); + // ......................................................................... + // We require the successor of the current node so we can perform a + // comparison. It should never be null. + // ......................................................................... + + prevNode = (TRI_skiplistEx_node_t*)(prevNode->_column[currentLevel]._prev); + if (prevNode == NULL) { + LOG_ERROR("CAS Failure"); + assert(0); + goto CAS_RESTART; + } - // ....................................................................... - // If the number of fields (attributes) in the key is LESS than the number - // of fields in the element to be compared to, then EVEN if the keys which - // which are common to both equate as EQUAL, we STILL return 1 rather than - // 0! This ensures that the right interval end point is correctly positioned - // -- slightly inefficient since the lowest level skip list 0 has to be reached - // in this case. - // ....................................................................... + // ......................................................................... + // Is our next node a glass node? If so we must skip it! + // Note: since Garbage Collection is performed in TWO passes, it is possible + // that we have more than one glass node. + // ......................................................................... + if (prevNode->_towerFlag == TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG) { + goto START; + } - // ....................................................................... - // We have found the item! - // ....................................................................... - if (compareResult == 0) { - assert(false); - } - // ....................................................................... - // The key is greater than the next node element. Keep going on this - // level. - // ....................................................................... - if (compareResult < 0) { - currentNode = prevNode; + // ......................................................................... + // WE HAVE FOUR CASES TO CONSIDER + // ......................................................................... + + // ......................................................................... + // CASE ONE: + // At this level we have the smallest (start) and largest (end) nodes ONLY. + // CASE TWO: + // We have arrived at the end of the nodes and we are not at the + // start of the nodes either. + // ......................................................................... + + if (prevNode == &(skiplist->_base._startNode)) { + + + // ....................................................................... + // We are at the lowest level of the lists, and we haven't found the item + // yet. The currentNode does NOT compare and the next node is +\infinty. + // ....................................................................... + + if (currentLevel == 0) { + return currentNode; + } + + + // ....................................................................... + // We have not yet reached the lowest level continue down. Possibly our + // item we seek is to be found a lower level. + // ....................................................................... + + prevNode = currentNode; + --currentLevel; goto START; } - - // ....................................................................... - // We have reached the lowest level of the lists -- no such item. - // ....................................................................... - if (currentLevel == 0) { - return currentNode; - } + - // ....................................................................... - // Drop down the list - // ....................................................................... - --currentLevel; + // ......................................................................... + // CASE THREE: + // We are the smallest left most node and the NEXT node is NOT the end node. + // Compare this element with the element in the right node to see what we do. + // CASE FOUR: + // We are somewhere in the middle of a list, away from the smallest and + // largest nodes. + // ......................................................................... - goto START; - } - + else { // nextNode != &(skiplist->_endNode + int compareResult; - -// END: + // ....................................................................... + // Use the callback to determine if the element is less or greater than + // the next node element. We treat the comparison by assuming we are + // looking for a "key - epsilon". With this assumption we always find the + // last key to our right if it exists. The reason this is necessary is as + // follows: we allow a multiple documents with the same key to be stored + // here with the proviso that all but the last one is marked as deleted. + // This is how we cater for multiple revisions. + // ....................................................................... + + compareResult = IndexStaticCompareKeyElement(skiplist,key,&(prevNode->_element), 1); + + // ....................................................................... + // If the number of fields (attributes) in the key is LESS than the number + // of fields in the element to be compared to, then EVEN if the keys which + // which are common to both equate as EQUAL, we STILL return 1 rather than + // 0! This ensures that the right interval end point is correctly positioned + // -- slightly inefficient since the lowest level skip list 0 has to be reached + // in this case. + // ....................................................................... - assert(false); // there is no way we can be here + // ....................................................................... + // We have found the item! + // ....................................................................... + + if (compareResult == 0) { + assert(false); + } + + if (compareResult < 0) { + currentNode = prevNode; + goto START; + } + + + // ....................................................................... + // The element is less than the next node. Can we drop down the list? + // If have reached the lowest level of the lists -- no such item. + // ....................................................................... + + if (currentLevel == 0) { + return currentNode; + } + + + // ....................................................................... + // Drop down the list + // ....................................................................... + + --currentLevel; + prevNode = currentNode; + goto START; + } + + } // end of label START + + } // end of label CAS_RESTART + + assert(false); return NULL; } @@ -1580,6 +1859,9 @@ int TRI_InitSkipListExMulti (TRI_skiplistEx_multi_t* skiplist, size_t elementSiz // Assign the maximum height of the skip list. This maximum height must be // no greater than the absolute max height defined as a compile time parameter // .......................................................................... + if (maximumHeight == 0) { + maximumHeight = SKIPLIST_EX_ABSOLUTE_MAX_HEIGHT; + } skiplist->_base._maxHeight = maximumHeight; if (maximumHeight > SKIPLIST_EX_ABSOLUTE_MAX_HEIGHT) { LOG_ERROR("Invalid maximum height for skiplist"); @@ -1695,7 +1977,7 @@ int TRI_InitSkipListExMulti (TRI_skiplistEx_multi_t* skiplist, size_t elementSiz // [N]<----------------------------------->[N] // [N]<----------------------------------->[N] // .......................................................................... - JoinStartEndNodes(&(skiplist->_base._startNode),&(skiplist->_base._endNode),0,1); // joins list 0 & 1 + JoinStartEndNodes(&(skiplist->_base._startNode),&(skiplist->_base._endNode),0, skiplist->_base._maxHeight - 1); // joins list 0 & 1 return TRI_ERROR_NO_ERROR; } @@ -1707,10 +1989,9 @@ int TRI_InitSkipListExMulti (TRI_skiplistEx_multi_t* skiplist, size_t elementSiz //////////////////////////////////////////////////////////////////////////////// void TRI_DestroySkipListExMulti (TRI_skiplistEx_multi_t* skiplist) { - if (skiplist == NULL) { - return; + if (skiplist != NULL) { + DestroyBaseSkipListEx( (TRI_skiplistEx_base_t*)(skiplist) ); } - DestroyBaseSkipListEx( (TRI_skiplistEx_base_t*)(skiplist) ); } @@ -1720,8 +2001,10 @@ void TRI_DestroySkipListExMulti (TRI_skiplistEx_multi_t* skiplist) { //////////////////////////////////////////////////////////////////////////////// void TRI_FreeSkipListExMulti (TRI_skiplistEx_multi_t* skiplist) { - TRI_DestroySkipListExMulti(skiplist); - TRI_Free(TRI_UNKNOWN_MEM_ZONE, skiplist); + if (skiplist != NULL) { + TRI_DestroySkipListExMulti(skiplist); + TRI_Free(TRI_UNKNOWN_MEM_ZONE, skiplist); + } } @@ -1757,6 +2040,350 @@ void* TRI_EndNodeSkipListExMulti(TRI_skiplistEx_multi_t* skiplist) { +//////////////////////////////////////////////////////////////////////////////// +/// @brief adds an element to a multi skip list using an element for searching +//////////////////////////////////////////////////////////////////////////////// + +int TRI_InsertElementSkipListExMulti(TRI_skiplistEx_multi_t* skiplist, + void* element, + bool overwrite, + uint64_t thisTransID) { + int32_t newHeight; + int32_t currentLevel; + TRI_skiplistEx_node_t* currentNode; + TRI_skiplistEx_node_t* nextNode; + TRI_skiplistEx_node_t* newNode; + int compareResult; + int result; + int casFailures = -1; + + + // ........................................................................... + // Just in case + // ........................................................................... + + if (skiplist == NULL) { + return TRI_ERROR_INTERNAL; + } + + + // ........................................................................... + // Determine the number of levels in which to add the item. That is, determine + // the height of the node so that it participates in that many lists. + // ........................................................................... + + newHeight = RandLevel(&(skiplist->_base)) + 1; + + // ........................................................................... + // Something wrong since the newHeight must be non-negative + // ........................................................................... + + if (newHeight < 1) { + return TRI_ERROR_INTERNAL; + } + + + // ........................................................................... + // Grow lists if required by increasing the height of the start and end nodes + // ........................................................................... + + result = GrowStartEndNodes(&(skiplist->_base), newHeight); + if (result != TRI_ERROR_NO_ERROR) { + return result; + } + + // ........................................................................... + // Create the new node to be inserted. If there is some sort of failure, + // then we delete the node memory. + // ........................................................................... + newNode = TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_skiplistEx_node_t) + skiplist->_base._elementSize, false); + if (newNode == NULL) { // out of memory? + return TRI_ERROR_OUT_OF_MEMORY; + } + + + // ........................................................................... + // Copy the contents of element into the new node to be inserted. + // If a duplicate has been found, then we destroy the allocated memory. + // ........................................................................... + newNode->_column = NULL; + newNode->_colLength = 0; + newNode->_extraData = NULL; + + result = IndexStaticCopyElementElement(&(skiplist->_base), &(newNode->_element), element); + result = GrowNewNodeHeight(newNode, newHeight, newHeight, result); + if (result != TRI_ERROR_NO_ERROR) { + FreeSkipListExNode(&(skiplist->_base), newNode); + return result; + } + + // ........................................................................... + // Assign the deletion transaction id and the insertion transaction id + // ........................................................................... + + newNode->_delTransID = UINT64_MAX; // since we are inserting this new node it can not be deleted + newNode->_insTransID = thisTransID; // this is what was given to us + + + // ........................................................................... + // Big loop to restart the whole search routine + // ........................................................................... + + + CAS_RESTART: { + + // ........................................................................... + // To stop this loop CAS_RESTART becomming an infinite loop, use this check + // ........................................................................... + + if (casFailures == SKIPLIST_EX_CAS_FAILURES_MAX_LOOP) { + LOG_ERROR("CAS Failure"); + FreeSkipListExNode(&(skiplist->_base), newNode); + return TRI_WARNING_ARANGO_INDEX_SKIPLIST_INSERT_CAS_FAILURE; + } + + + // ........................................................................... + // Provide a simple non-blocking, lock? The sleep time should be something + // needs to be adjusted. + // ........................................................................... + + if (casFailures > -1) { + usleep(CAS_FAILURE_SLEEP_TIME); + } + + + // ........................................................................... + // Increment the cas failures (which should always be hopefully 0). + // ........................................................................... + + ++casFailures; + + + // ........................................................................... + // Determine the path where the new item is to be inserted. If the item + // already exists either replace it or return false. Recall that this + // skip list is used for unique key/value pairs. Use the skiplist-multi + // non-unique key/value pairs. + // ........................................................................... + + currentLevel = skiplist->_base._startNode._colLength - 1; // NOT current height BUT current level is required here + currentNode = &(skiplist->_base._startNode); + nextNode = currentNode; + + + START: { + + + // ......................................................................... + // The current node (which we have called the nextNode below) should never + // be null. Protect yourself in case something has gone wrong. + // ......................................................................... + + if (nextNode == NULL) { + LOG_ERROR("CAS Failure"); + assert(0); + goto CAS_RESTART; + } + + + // ......................................................................... + // We require the successor of the current node so we can perform a + // comparison. It should never be null. + // ......................................................................... + + nextNode = (TRI_skiplistEx_node_t*)(nextNode->_column[currentLevel]._next); + if (nextNode == NULL) { + LOG_ERROR("CAS Failure"); + assert(0); + goto CAS_RESTART; + } + + + // ......................................................................... + // An insert/lookup/removal SEARCH like this, can ONLY ever find 1 glass + // node when we are very unlucky. (The GC makes the node glass and then + // goes and unlinks the pointers.) If we skip the glass node, then we + // will have the wrong pointers to compare, so we have to CAS_RESTART + // ......................................................................... + + if (nextNode->_towerFlag == TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG) { + goto CAS_RESTART; + } + + + // ......................................................................... + // WE HAVE FOUR CASES TO CONSIDER + // ......................................................................... + + // ......................................................................... + // CASE ONE: + // At this level we have the smallest (start) and largest (end) nodes ONLY. + // CASE TWO: + // We have arrived at the end of the nodes and we are not at the + // start of the nodes either. + // ......................................................................... + + if (nextNode == &(skiplist->_base._endNode)) { + + + // ....................................................................... + // Store the current node and level in the path + // ....................................................................... + + if (currentLevel < newHeight) { + newNode->_column[currentLevel]._prev = currentNode; + newNode->_column[currentLevel]._next = nextNode; + } + + // ....................................................................... + // if we are at the lowest level of the lists, insert the item to the + // right of the current node + // ....................................................................... + if (currentLevel == 0) { + goto END; + } + + // ....................................................................... + // We have not yet reached the lowest level continue down. + // ....................................................................... + nextNode = currentNode; + --currentLevel; + + goto START; + } + + + + // ......................................................................... + // CASE THREE: + // We are the smallest left most node and the NEXT node is NOT the end node. + // Compare this element with the element in the right node to see what we do. + // CASE FOUR: + // We are somewhere in the middle of a list, away from the smallest and + // largest nodes. + // ......................................................................... + + else { // nextNode != &(skiplist->_endNode + + // ....................................................................... + // Use the callback to determine if the element is less or greater than + // the next node element. + // ....................................................................... + + compareResult = IndexStaticMultiCompareElementElement(skiplist, element, &(nextNode->_element), -1); + + + // ....................................................................... + // The element matches the next element. Overwrite if possible and return. + // The only possiblity of obtaining a compareResult equal to 0 is for the + // the element being the same, NOT the keys being the same. + // ....................................................................... + + if (compareResult == 0) { + FreeSkipListExNode(&(skiplist->_base), newNode); + if (overwrite) { + // ................................................................... + // Warning: there is NO check to ensure that this node has not been + // previously deleted. + // ................................................................... + result = IndexStaticCopyElementElement(&(skiplist->_base), &(nextNode->_element), element); + return result; + } + return TRI_ERROR_ARANGO_INDEX_SKIPLIST_INSERT_ITEM_DUPLICATED; + } + + // ....................................................................... + // The element to be inserted has a key which is greater than the next node's + // element key. Keep going on this level. + // ....................................................................... + + if (compareResult > 0) { + currentNode = nextNode; + goto START; + } + + + // ....................................................................... + // The element is less than the next node. Can we drop down the list? + // Store the current node and level in the path. + // ....................................................................... + + if (currentLevel < newHeight) { + newNode->_column[currentLevel]._prev = currentNode; + newNode->_column[currentLevel]._next = nextNode; + } + + + // ....................................................................... + // We have reached the lowest level of the lists. Time to insert item. + // Note that we will insert this item to the left of all the items with + // the same key. Note also that the higher transaction numbers are to + // the left always. + // ....................................................................... + + if (currentLevel == 0) { + goto END; + } + + + // ....................................................................... + // Drop down the list + // ....................................................................... + + nextNode = currentNode; + --currentLevel; + + goto START; + } + + } // end of label START + + } // end of label CAS_RESTART + + + END: { + + // .......................................................................... + // Ok finished with the loop and we should have a path with AT MOST + // SKIPLIST_EX_ABSOLUTE_MAX_HEIGHT number of elements. + // .......................................................................... + + + // .......................................................................... + // this is the tricky part since we have to attempt to do this as + // 'lock-free' as possible. This is acheived in three passes: + // Pass 1: Mark each prev and next node of the new node so that the GC + // can not modify it. If this fails goto CAS_RESTART + // Pass 2: Ensure that each prev and next tower is not glassed. + // Pass 3: Modify the newnode.prev.next to newnode and newnode.next.prev = newnode + // .......................................................................... + + result = JoinNewNodeCas(newNode); + if (result == TRI_WARNING_ARANGO_INDEX_SKIPLIST_INSERT_CAS_FAILURE) { + goto CAS_RESTART; + } + return result; + + } // end of END label + + return TRI_ERROR_NO_ERROR; +} + + + +//////////////////////////////////////////////////////////////////////////////// +/// @brief adds an key/element to a multi skip list +//////////////////////////////////////////////////////////////////////////////// + +int TRI_InsertKeySkipListExMulti(TRI_skiplistEx_multi_t* skiplist, void* key, void* element, bool overwrite, uint64_t thisTransID) { + // Use TRI_InsertElementSkipListExMulti instead of calling this method + assert(false); + return 0; +} + + + //////////////////////////////////////////////////////////////////////////////// /// @brief returns greatest node less than a given key //////////////////////////////////////////////////////////////////////////////// @@ -1765,124 +2392,206 @@ void* TRI_LeftLookupByKeySkipListExMulti(TRI_skiplistEx_multi_t* skiplist, void* int32_t currentLevel; TRI_skiplistEx_node_t* currentNode; TRI_skiplistEx_node_t* nextNode; - + int casFailures = -1; + // ........................................................................... - // Just in case + // Just in case ... // ........................................................................... if (skiplist == NULL) { + LOG_ERROR("Internal Error"); return NULL; } - + // ........................................................................... - // Determine the starting level and the starting node + // Big loop to restart the whole search routine // ........................................................................... - currentLevel = skiplist->_base._startNode._colLength - 1; - currentNode = &(skiplist->_base._startNode); - + CAS_RESTART: { - START: - + // ........................................................................... + // To stop this loop CAS_RESTART becomming an infinite loop, use this check + // ........................................................................... + + if (casFailures == SKIPLIST_EX_CAS_FAILURES_MAX_LOOP) { + LOG_ERROR("CAS Failure"); + return NULL; + } + + + // ........................................................................... + // Provide a simple non-blocking, block? + // ........................................................................... + + if (casFailures > -1) { + assert(0); // a test to see why it blocks - should not block! + usleep(CAS_FAILURE_SLEEP_TIME); + } - // ......................................................................... - // Find the next node in the current level of the lists. - // ......................................................................... - nextNode = (TRI_skiplistEx_node_t*)(currentNode->_column[currentLevel]._next); + + // ........................................................................... + // Increment the cas failures (which should always be hopefully 0). + // ........................................................................... + + ++casFailures; + + + // ........................................................................... + // Determine the starting level and the starting node + // ........................................................................... + + currentLevel = skiplist->_base._startNode._colLength - 1; + currentNode = &(skiplist->_base._startNode); + nextNode = currentNode; + + + START: { - - - // ......................................................................... - // WE HAVE FOUR CASES TO CONSIDER - // ......................................................................... - // ......................................................................... - // CASE ONE: - // At this level we have the smallest (start) and largest (end) nodes ONLY. - // CASE TWO: - // We have arrived at the end of the nodes and we are not at the - // start of the nodes either. - // ......................................................................... - - if (nextNode == &(skiplist->_base._endNode)) { - - - // ....................................................................... - // We are at the lowest level of the lists, and we haven't found the item - // yet. Eventually we would like to return iterators. - // ....................................................................... - if (currentLevel == 0) { - return currentNode; + // ......................................................................... + // Find the next node in the current level of the lists. Protect yourself + // in case something has gone wrong. + // ......................................................................... + + if (nextNode == NULL) { + LOG_ERROR("CAS Failure"); + assert(0); + goto CAS_RESTART; } - // ....................................................................... - // We have not yet reached the lowest level continue down. - // ....................................................................... - --currentLevel; - goto START; - } - - - - // ......................................................................... - // CASE THREE: - // We are the smallest left most node and the NEXT node is NOT the end node. - // Compare this element with the element in the right node to see what we do. - // CASE FOUR: - // We are somewhere in the middle of a list, away from the smallest and - // largest nodes. - // ......................................................................... - - else { // nextNode != &(skiplist->_endNode - // ....................................................................... - // Use the callback to determine if the element is less or greater than - // the next node element. - // ....................................................................... - int compareResult = IndexStaticMultiCompareKeyElement(skiplist,key,&(nextNode->_element), -1); + // ......................................................................... + // We require the successor of the current node so we can perform a + // comparison. It should never be null. + // ......................................................................... - - // ....................................................................... - // We have found the item! Not possible - // ....................................................................... - if (compareResult == 0) { - //return &(nextNode->_element); - //return currentNode; - assert(false); - return (void*)(nextNode->_column[0]._prev); + nextNode = (TRI_skiplistEx_node_t*)(nextNode->_column[currentLevel]._next); + if (nextNode == NULL) { + LOG_ERROR("CAS Failure"); + assert(0); + goto CAS_RESTART; } - // ....................................................................... - // The element is greater than the next node element. Keep going on this - // level. - // ....................................................................... - if (compareResult > 0) { - currentNode = nextNode; + + // ......................................................................... + // Is our next node a glass node? If so we must skip it! + // Note: since Garbage Collection is performed in TWO passes, it is possible + // that we have more than one glass node. + // ......................................................................... + + if (nextNode->_towerFlag == TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG) { + goto START; + } + + + // ......................................................................... + // WE HAVE FOUR CASES TO CONSIDER + // ......................................................................... + + // ......................................................................... + // CASE ONE: + // At this level we have the smallest (start) and largest (end) nodes ONLY. + // CASE TWO: + // We have arrived at the end of the nodes and we are not at the + // start of the nodes either. + // ......................................................................... + + if (nextNode == &(skiplist->_base._endNode)) { + + + // ....................................................................... + // We are at the lowest level of the lists, and we haven't found the item + // yet. The currentNode does NOT compare and the next node is +\infinty. + // ....................................................................... + + if (currentLevel == 0) { + return currentNode; + } + + + // ....................................................................... + // We have not yet reached the lowest level continue down. Possibly our + // item we seek is to be found a lower level. + // ....................................................................... + + nextNode = currentNode; + --currentLevel; goto START; } - - // ....................................................................... - // We have reached the lowest level of the lists -- no such item. - // ....................................................................... - if (currentLevel == 0) { - return currentNode; - } + - // ....................................................................... - // Drop down the list - // ....................................................................... - --currentLevel; + // ......................................................................... + // CASE THREE: + // We are the smallest left most node and the NEXT node is NOT the end node. + // Compare this element with the element in the right node to see what we do. + // CASE FOUR: + // We are somewhere in the middle of a list, away from the smallest and + // largest nodes. + // ......................................................................... - goto START; - } - + else { // nextNode != &(skiplist->_endNode + int compareResult; - -// END: + // ....................................................................... + // Use the callback to determine if the element is less or greater than + // the next node element. We treat the comparison by assuming we are + // looking for a "key - epsilon". With this assumption we always find the + // last key to our right if it exists. The reason this is necessary is as + // follows: we allow a multiple documents with the same key to be stored + // here with the proviso that all but the last one is marked as deleted. + // This is how we cater for multiple revisions. + // ....................................................................... + + compareResult = IndexStaticMultiCompareKeyElement(skiplist, key, &(nextNode->_element), -1); + + // ....................................................................... + // -1 is returned if the number of fields (attributes) in the key is LESS + // than the number of fields in the index definition. This has the effect + // of being slightly less efficient since we have to proceed to the level + // 0 list in the set of skip lists. Where we allow duplicates such as this + // -1 is also returned when all the keys match. + // ....................................................................... - assert(false); // there is no way we can be here + // ....................................................................... + // We have found the item! + // ....................................................................... + + if (compareResult == 0) { + assert(false); + } + + if (compareResult > 0) { + currentNode = nextNode; + goto START; + } + + + // ....................................................................... + // The element is less than the next node. Can we drop down the list? + // If have reached the lowest level of the lists -- no such item. + // ....................................................................... + + if (currentLevel == 0) { + return currentNode; + } + + + // ....................................................................... + // Drop down the list + // ....................................................................... + + --currentLevel; + nextNode = currentNode; + goto START; + } + + } // end of label START + + } // end of label CAS_RESTART + + assert(false); return NULL; } @@ -1904,265 +2613,17 @@ void* TRI_LookupByElementSkipListExMulti(TRI_skiplistEx_multi_t* skiplist, void* //////////////////////////////////////////////////////////////////////////////// void* TRI_LookupByKeySkipListExMulti(TRI_skiplistEx_multi_t* skiplist, void* key, uint64_t thisTransID) { + // Since this index supports duplicate keys, it makes no sense to lookup an element in the index + // using a key - if there are such elements - what is returned is undefined (in the sense that a valid + // element is returned but which one?). Hence lookups can only really make sense to say give me the + // first such element and the last such element, so that we can traverse the elements which match the + // keys. assert(false); // there is no way you should be here return 0; } -//////////////////////////////////////////////////////////////////////////////// -/// @brief adds an element to a multi skip list using an element for searching -//////////////////////////////////////////////////////////////////////////////// - -int TRI_InsertElementSkipListExMulti(TRI_skiplistEx_multi_t* skiplist, void* element, bool overwrite, uint64_t thisTransID) { - //This uses the compareElementElement callback - int32_t newHeight; - int32_t currentLevel; - TRI_skiplistEx_node_t* currentNode; - TRI_skiplistEx_node_t* nextNode; - TRI_skiplistEx_node_t* newNode; - //TRI_skiplistEx_node_t* tempLeftNode; - //TRI_skiplistEx_node_t* tempRightNode; - int compareResult; - int j; - - // ........................................................................... - // Just in case - // ........................................................................... - - if (skiplist == NULL) { - return TRI_ERROR_INTERNAL; - } - - - // ........................................................................... - // Determine the number of levels in which to add the item. That is, determine - // the height of the node so that it participates in that many lists. - // ........................................................................... - - newHeight = RandLevel(&(skiplist->_base)); - - // ........................................................................... - // Something wrong since the newHeight must be non-negative - // ........................................................................... - - if (newHeight < 0) { - return TRI_ERROR_INTERNAL; - } - - // ........................................................................... - // convert the level to a height - // ........................................................................... - newHeight += 1; - - - // ........................................................................... - // Grow lists if required by increasing the height of the start and end nodes - // ........................................................................... - /* oreste fix me: - oldColLength = skiplist->_base._startNode._colLength; - if ((uint32_t)(newHeight) > oldColLength) { - - growResult = GrowNodeHeight(&(skiplist->_base._startNode), newHeight); - growResult = growResult && GrowNodeHeight(&(skiplist->_base._endNode), newHeight); - if (!growResult) { - // todo: truncate the nodes and return; - return TRI_ERROR_OUT_OF_MEMORY; - } - // oreste to fix: JoinXXNodesCAS(&(skiplist->_base._startNode),&(skiplist->_base._endNode), oldColLength , newHeight - 1); - } - */ - - // ........................................................................... - // Create the new node to be inserted. If there is some sort of failure, - // then we delete the node memory. - // ........................................................................... - newNode = TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_skiplistEx_node_t) + skiplist->_base._elementSize, false); - if (newNode == NULL) { // out of memory? - return TRI_ERROR_OUT_OF_MEMORY; - } - - - // ........................................................................... - // Copy the contents of element into the new node to be inserted. - // If a duplicate has been found, then we destroy the allocated memory. - // ........................................................................... - newNode->_column = NULL; - newNode->_colLength = 0; - newNode->_extraData = NULL; - - j = IndexStaticCopyElementElement(&(skiplist->_base), &(newNode->_element),element); - - if (j != TRI_ERROR_NO_ERROR) { - return j; - } - - /* oreste fix me: - growResult = GrowNodeHeight(newNode, newHeight); - if (!growResult) { - FreeSkipListExNode(&(skiplist->_base), newNode); - return TRI_ERROR_OUT_OF_MEMORY; - } - */ - - // ........................................................................... - // Determine the path where the new item is to be inserted. If the item - // already exists either replace it or return false. Recall that this - // skip list is used for unique key/value pairs. Use the skiplist-multi - // non-unique key/value pairs. - // ........................................................................... - currentLevel = skiplist->_base._startNode._colLength - 1; // NOT current height BUT current level is required here - currentNode = &(skiplist->_base._startNode); - - - START: - - - // ......................................................................... - // Find the next node in the current level of the lists. - // ......................................................................... - nextNode = (TRI_skiplistEx_node_t*)(currentNode->_column[currentLevel]._next); - - - // ......................................................................... - // WE HAVE FOUR CASES TO CONSIDER - // ......................................................................... - - // ......................................................................... - // CASE ONE: - // At this level we have the smallest (start) and largest (end) nodes ONLY. - // CASE TWO: - // We have arrived at the end of the nodes and we are not at the - // start of the nodes either. - // ......................................................................... - - if (nextNode == &(skiplist->_base._endNode)) { - - - // ....................................................................... - // Store the current node and level in the path - // ....................................................................... - if (currentLevel < newHeight) { - newNode->_column[currentLevel]._prev = currentNode; - } - - // ....................................................................... - // if we are at the lowest level of the lists, insert the item to the - // right of the current node - // ....................................................................... - if (currentLevel == 0) { - goto END; - } - - // ....................................................................... - // We have not yet reached the lowest level continue down. - // ....................................................................... - --currentLevel; - - goto START; - } - - - - // ......................................................................... - // CASE THREE: - // We are the smallest left most node and the NEXT node is NOT the end node. - // Compare this element with the element in the right node to see what we do. - // CASE FOUR: - // We are somewhere in the middle of a list, away from the smallest and - // largest nodes. - // ......................................................................... - - else { // nextNode != &(skiplist->_endNode - - // ....................................................................... - // Use the callback to determine if the element is less or greater than - // the next node element. - // ....................................................................... - compareResult = IndexStaticMultiCompareElementElement(skiplist,element,&(nextNode->_element), -1); - - - // ....................................................................... - // The element matches the next element. Overwrite if possible and return. - // We do not allow non-unique elements (non-unique 'keys' ok). - // ....................................................................... - if (compareResult == 0) { - FreeSkipListExNode(&(skiplist->_base), newNode); - if (overwrite) { - j = IndexStaticCopyElementElement(&(skiplist->_base), &(nextNode->_element),element); - return j; - } - return TRI_ERROR_ARANGO_INDEX_SKIPLIST_INSERT_ITEM_DUPLICATED; - } - - // ....................................................................... - // The element is greater than the next node element. Keep going on this - // level. - // ....................................................................... - if (compareResult > 0) { - currentNode = nextNode; - goto START; - } - - - // ....................................................................... - // The element is less than the next node. Can we drop down the list? - // Store the current node and level in the path. - // ....................................................................... - if (currentLevel < newHeight) { - newNode->_column[currentLevel]._prev = currentNode; - } - - // ....................................................................... - // We have reached the lowest level of the lists. Time to insert item. - // ....................................................................... - if (currentLevel == 0) { - goto END; - } - - // ....................................................................... - // Drop down the list - // ....................................................................... - --currentLevel; - - goto START; - } - - - - END: - - // .......................................................................... - // Ok finished with the loop and we should have a path with AT MOST - // SKIPLIST_ABSOLUTE_MAX_HEIGHT number of elements. - // .......................................................................... - - /* fix me oreste - for (j = 0; j < newHeight; ++j) { - tempLeftNode = newNode->_column[j]._prev; - tempRightNode = tempLeftNode->_column[j]._next; - // oreste to fix: JoinXXNodesCAS(tempLeftNode, newNode, j, j); - // oreste to fix: JoinXXNodesCAS(newNode, tempRightNode, j, j); - } -*/ - - return TRI_ERROR_NO_ERROR; -} - - - -//////////////////////////////////////////////////////////////////////////////// -/// @brief adds an key/element to a multi skip list -//////////////////////////////////////////////////////////////////////////////// - -int TRI_InsertKeySkipListExMulti(TRI_skiplistEx_multi_t* skiplist, void* key, void* element, bool overwrite, uint64_t thisTransID) { - // Use TRI_InsertelementSkipListEx instead of calling this method - assert(false); - return 0; -} - - - //////////////////////////////////////////////////////////////////////////////// /// @brief given a node returns the next node (if possible) in the skiplist //////////////////////////////////////////////////////////////////////////////// @@ -2196,186 +2657,366 @@ void* TRI_PrevNodeSkipListExMulti(TRI_skiplistEx_multi_t* skiplist, void* curren int TRI_RemoveElementSkipListExMulti (TRI_skiplistEx_multi_t* skiplist, void* element, void* old, - const int passLevel, const uint64_t thisTransID) { - + const int passLevel, const uint64_t thisTransID, + TRI_skiplistEx_node_t** passNode) { + // ........................................................................... + // To remove an element from this skip list we have three pass levels: + // Pass 1: locate (if possible) the exact NODE - must match exactly. + // Once located, add the transaction id to the node. Return. + // Pass 2: locate the node (if not possible report error) - must match exactly. + // Once located, attempt to unlink all the pointers and make the + // node a Glass Node. + // Pass 3: Excise the node by destroying it's allocated memory. + // ........................................................................... + int32_t currentLevel; - TRI_skiplistEx_node_t* currentNode; - TRI_skiplistEx_node_t* nextNode; - //TRI_skiplistEx_node_t* tempLeftNode; - //TRI_skiplistEx_node_t* tempRightNode; - int compareResult; - //unsigned int j; + TRI_skiplistEx_node_t* currentNode = NULL; + TRI_skiplistEx_node_t* nextNode = NULL; + int casFailures = -1; // ........................................................................... // Just in case // ........................................................................... if (skiplist == NULL) { + LOG_ERROR("Internal Error"); return TRI_ERROR_INTERNAL; } // ........................................................................... - // Start at the top most list and left most position of that list. + // Only for pass level 1 do we have a requirement to locate the actual node + // using the key. For pass levels 2 & 3 we have the pointer to the node. // ........................................................................... - currentLevel = skiplist->_base._startNode._colLength - 1; // current level not height - currentNode = &(skiplist->_base._startNode); - - START: - - // ......................................................................... - // Find the next node in the current level of the lists. - // ......................................................................... - nextNode = (TRI_skiplistEx_node_t*)(currentNode->_column[currentLevel]._next); + if (passLevel != 1) { goto END; } + + // ........................................................................... + // Big loop to restart the whole search routine + // ........................................................................... + + CAS_RESTART: { + + // ........................................................................... + // To stop this loop CAS_RESTART becomming an infinite loop, use this check + // ........................................................................... + + if (casFailures == SKIPLIST_EX_CAS_FAILURES_MAX_LOOP) { + LOG_ERROR("CAS Failure"); + return TRI_WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_CAS_FAILURE; + } + + + // ........................................................................... + // Provide a simple non-blocking, block? + // ........................................................................... + + if (casFailures > -1) { + usleep(CAS_FAILURE_SLEEP_TIME); + } + + + // ........................................................................... + // Increment the cas failures (which should always be hopefully 0). + // ........................................................................... + + ++casFailures; + + + // ........................................................................... + // Determine the starting level and the starting node + // ........................................................................... + + currentLevel = skiplist->_base._startNode._colLength - 1; + currentNode = &(skiplist->_base._startNode); + nextNode = currentNode; + + + START: { - - // ......................................................................... - // WE HAVE FOUR CASES TO CONSIDER - // ......................................................................... - - // ......................................................................... - // CASE ONE: - // At this level we have the smallest (start) and largest (end) nodes ONLY. - // CASE TWO: - // We have arrived at the end of the nodes and we are not at the - // start of the nodes either. - // ......................................................................... - - if (nextNode == &(skiplist->_base._endNode)) { - - // ....................................................................... - // We are at the lowest level of the lists, and we haven't found the item - // yet. Nothing to remove so return. - // ....................................................................... - if (currentLevel == 0) { - return TRI_WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_MISSING; + // ......................................................................... + // Find the next node in the current level of the lists. Protect yourself + // in case something has gone wrong. + // ......................................................................... + + if (nextNode == NULL) { + LOG_ERROR("CAS Failure"); + assert(0); + goto CAS_RESTART; } - // ....................................................................... - // We have not yet reached the lowest level continue down. - // ....................................................................... - --currentLevel; - goto START; - } - - - - // ......................................................................... - // CASE THREE: - // We are the smallest left most node and the NEXT node is NOT the end node. - // Compare this element with the element in the right node to see what we do. - // CASE FOUR: - // We are somewhere in the middle of a list, away from the smallest and - // largest nodes. - // ......................................................................... - - else { // nextNode != &(skiplist->_endNode - - // ....................................................................... - // Use the callback to determine if the element is less or greater than - // the next node element. - // ....................................................................... - compareResult = IndexStaticMultiCompareElementElement(skiplist,element,&(nextNode->_element), TRI_SKIPLIST_EX_COMPARE_SLIGHTLY_LESS); + // ......................................................................... + // We require the successor of the current node so we can perform a + // comparison. It should never be null. + // ......................................................................... - // ....................................................................... - // We have found an item which matches the key - // ....................................................................... - if (compareResult == TRI_SKIPLIST_EX_COMPARE_STRICTLY_EQUAL) { - currentNode = nextNode; - goto END; + nextNode = (TRI_skiplistEx_node_t*)(nextNode->_column[currentLevel]._next); + if (nextNode == NULL) { + LOG_ERROR("CAS Failure"); + assert(0); + goto CAS_RESTART; } - - // ....................................................................... - // The element is greater than the next node element. Keep going on this - // level. - // ....................................................................... - if (compareResult > 0) { - currentNode = nextNode; + + // ......................................................................... + // Is our next node a glass node? If so we must skip it. Recall we are in + // Phase I here -- meaning that we are searching for a node which has not + // be removed and previously inserted. + // ......................................................................... + + if (nextNode->_towerFlag == TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG) { + goto START; + } + + + // ......................................................................... + // WE HAVE FOUR CASES TO CONSIDER + // ......................................................................... + + // ......................................................................... + // CASE ONE: + // At this level we have the smallest (start) and largest (end) nodes ONLY. + // CASE TWO: + // We have arrived at the end of the nodes and we are not at the + // start of the nodes either. + // ......................................................................... + + if (nextNode == &(skiplist->_base._endNode)) { + + // ....................................................................... + // We are at the lowest level of the lists, and we haven't found the item + // yet. Nothing to remove so return. + // ....................................................................... + if (currentLevel == 0) { + return TRI_WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_MISSING; + } + + // ....................................................................... + // We have not yet reached the lowest level continue down. + // ....................................................................... + + nextNode = currentNode; + --currentLevel; + goto START; } - - - // ....................................................................... - // We have reached the lowest level of the lists -- no such item. - // ....................................................................... - if (currentLevel == 0) { + + - // ..................................................................... - // The element could not be located - // ..................................................................... - if (compareResult == TRI_SKIPLIST_EX_COMPARE_STRICTLY_LESS) { - return TRI_WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_MISSING; + // ......................................................................... + // CASE THREE: + // We are the smallest left most node and the NEXT node is NOT the end node. + // Compare this element with the element in the right node to see what we do. + // CASE FOUR: + // We are somewhere in the middle of a list, away from the smallest and + // largest nodes. + // ......................................................................... + + else { // nextNode != &(skiplist->_endNode + int compareResult; + + // ....................................................................... + // Use the callback to determine if the element is less or greater than + // the next node element. + // ....................................................................... + + compareResult = IndexStaticMultiCompareElementElement(skiplist,element,&(nextNode->_element), -1); + + + // ....................................................................... + // The element is greater than the next node element. Keep going on this + // level. + // ....................................................................... + if (compareResult > 0) { + currentNode = nextNode; + goto START; + } + + + if (compareResult == 0) { // a node matches exactly based upon the element + + if (nextNode->_delTransID > thisTransID) { + + // ................................................................... + // Node has NOT been deleted (e.g. imagine it will be deleted some + // time in the future). This is the node we want. + // ................................................................... + + currentNode = nextNode; + goto END; + } + + + // ..................................................................... + // In a skiplist supporting duplicate entries, the comparison function + // test ensures the elements are the same (e.g. same address in memory) + // it can never be the case that we rely simply on the keys matching. + // So the question remains: why has the item has been previously + // deleted? Has someone tried to remove this item twice? + // Don't know return error. + // ..................................................................... + + return TRI_WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_PRIOR_REMOVED; } - // ..................................................................... - // The element could be located (by matching the key) and we are at the lowest level - // ..................................................................... - if (compareResult == TRI_SKIPLIST_EX_COMPARE_SLIGHTLY_LESS) { - goto END; + + // ....................................................................... + // We have reached the lowest level of the lists -- no such item. + // ....................................................................... + if (currentLevel == 0) { + return TRI_WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_MISSING; } - // can not occur - assert(false); + // ....................................................................... + // Drop down the list + // ....................................................................... + --currentLevel; + nextNode = currentNode; + goto START; + } + + } // end of START label + } // end of CAS_RESTART label + + END: { + + switch (passLevel) { + + // ......................................................................... + // In this case we simply add the del transaction id with a CAS statement. + // It should never fail! + // ......................................................................... + + case 1: { + bool ok; + + if (currentNode == NULL) { // something terribly wrong + assert(0); + return TRI_ERROR_INTERNAL; + } + + ok = TRI_CompareAndSwapIntegerUInt64 (&(currentNode->_delTransID), + UINT64_MAX, thisTransID); + if (!ok) { + assert(0); + return TRI_ERROR_INTERNAL; + } + // .................................................................... + // If requested copy the contents of the element we have located into the + // storage sent. + // .................................................................... + + if (old != NULL) { + IndexStaticCopyElementElement(&(skiplist->_base), old, &(currentNode->_element)); + } + + *passNode = currentNode; + + return TRI_ERROR_NO_ERROR; } - // ....................................................................... - // Drop down the list - // ....................................................................... - --currentLevel; - goto START; - } + // ......................................................................... + // In this case we wish to make the node a glass node and to unjoin all + // other connected nodes. + // ......................................................................... + case 2: { + + // ....................................................................... + // We can not now rely upon looking up the node using the key, since + // we would need to traverse right and attempt to match either then + // transaction id and/or the pointer to the doc. Easier to simply + // send the address of the node back. + // ....................................................................... + if (*passNode == NULL) { + return TRI_ERROR_INTERNAL; + } + currentNode = (TRI_skiplistEx_node_t*)(*passNode); + + + // ....................................................................... + // Only the Garbage Collector can transform a node into a glass node, and + // since the GC is only operating in one thread safe to do a simple + // comparison here. + // ....................................................................... + if (currentNode->_towerFlag == TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG) { + return TRI_ERROR_INTERNAL; + } + + // ....................................................................... + // safety check + // ....................................................................... + if (currentNode->_delTransID != thisTransID) { + return TRI_ERROR_INTERNAL; + } + + // ....................................................................... + // The stragey is this: + // (a) Brick each nearest neighbour on this node. This ensures that NO + // other nodes can be attached to this node. + // (b) Mark this node as being glass. This ensures that it is skipped + // since it is no longer required in the index. + // (c) Unbrick each of its nearest neighbours on this node. This ensures + // that an inserted node MAY be allowed to be attached but will later fail. + // Also allows us to brick other glass nodes. + // (d) Brick each prev and next nearest neighbour of this node. Irrespective + // if one of these are glass or not. This ensures that lookups can + // proceed unhinded. + // (e) Unjoin the node from the list. + // (f) Unbrick each prev/next nearest neigbour + // ....................................................................... + + return UnJoinOldNodeCas(currentNode); + } + + + // ......................................................................... + // In this case since no other reader/writer can be accessing the node, + // we simply destroy it. we require the node to be glass. + // ......................................................................... + case 3: { + // ....................................................................... + // We can not now rely upon looking up the node using the key, since + // we would need to traverse right and attempt to match either then + // transaction id and/or the pointer to the doc. Easier to simply + // send the address of the node back. + // ....................................................................... + if (*passNode == NULL) { + return TRI_ERROR_INTERNAL; + } + currentNode = (TRI_skiplistEx_node_t*)(*passNode); + + // ....................................................................... + // Only the Garbage Collector can transform a node into a glass node, and + // since the GC is only operating in one thread safe to do a simple + // comparison here. + // ....................................................................... + if (currentNode->_towerFlag != TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG) { + return TRI_ERROR_INTERNAL; + } + + // ....................................................................... + // safety check + // ....................................................................... + if (currentNode->_delTransID != thisTransID) { + return TRI_ERROR_INTERNAL; + } + + FreeSkipListExNode(&(skiplist->_base), currentNode); + + break; + + } + + default: { + assert(0); + return TRI_ERROR_INTERNAL; + } + + } // end of switch statement - - - END: - - // .......................................................................... - // locate the correct elemet -- since we allow duplicates - // .......................................................................... - - while (currentNode != NULL) { - if (IndexStaticMultiEqualElementElement(skiplist, element, &(currentNode->_element))) { - break; - } - currentNode = NextNodeBaseSkipListEx(&(skiplist->_base), currentNode, thisTransID); - } - - - // .......................................................................... - // The actual element could not be located - an element with a matching key - // may exist, but the same data stored within the element could not be located - // .......................................................................... - - if (currentNode == NULL) { - return TRI_WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_MISSING; - } - - - // .......................................................................... - // Perhaps the user wants a copy before we destory the data? - // .......................................................................... - - if (old != NULL) { - IndexStaticCopyElementElement(&(skiplist->_base), old, &(currentNode->_element)); - } - - - // .......................................................................... - // remove element - // .......................................................................... - - /* fix me oreste - for (j = 0; j < currentNode->_colLength; ++j) { - tempLeftNode = currentNode->_column[j]._prev; - tempRightNode = currentNode->_column[j]._next; - // oreste to fix: JoinXXNodesCAS(tempLeftNode, tempRightNode, j, j); - } -*/ - FreeSkipListExNode(&(skiplist->_base), currentNode); + } // end of END label return TRI_ERROR_NO_ERROR; @@ -2388,7 +3029,8 @@ int TRI_RemoveElementSkipListExMulti (TRI_skiplistEx_multi_t* skiplist, void* el //////////////////////////////////////////////////////////////////////////////// int TRI_RemoveKeySkipListExMulti(TRI_skiplistEx_multi_t* skiplist, void* key, void* old, - const int passLevel, const uint64_t thisTransID) { + const int passLevel, const uint64_t thisTransID, + TRI_skiplistEx_node_t** passNode) { // Use the TRI_RemoveElementSkipListExMulti method instead. assert(false); return 0; @@ -2404,120 +3046,207 @@ void* TRI_RightLookupByKeySkipListExMulti(TRI_skiplistEx_multi_t* skiplist, void int32_t currentLevel; TRI_skiplistEx_node_t* currentNode; TRI_skiplistEx_node_t* prevNode; + int casFailures = -1; // ........................................................................... - // Just in case + // Just in case ... // ........................................................................... if (skiplist == NULL) { + LOG_ERROR("Internal Error"); return NULL; } - + // ........................................................................... - // Determine the starting level and the starting node + // Big loop to restart the whole search routine // ........................................................................... - currentLevel = skiplist->_base._startNode._colLength - 1; - currentNode = &(skiplist->_base._endNode); - + CAS_RESTART: { - START: - + // ........................................................................... + // To stop this loop CAS_RESTART becomming an infinite loop, use this check + // ........................................................................... + + if (casFailures == SKIPLIST_EX_CAS_FAILURES_MAX_LOOP) { + LOG_ERROR("CAS Failure"); + return NULL; + } + + + // ........................................................................... + // Provide a simple non-blocking, block? + // ........................................................................... + + if (casFailures > -1) { + assert(0); // a test to see why it blocks - should not block! + usleep(CAS_FAILURE_SLEEP_TIME); + } - // ......................................................................... - // Find the next node in the current level of the lists. - // ......................................................................... - prevNode = (TRI_skiplistEx_node_t*)(currentNode->_column[currentLevel]._prev); - - - - // ......................................................................... - // WE HAVE FOUR CASES TO CONSIDER - // ......................................................................... + + // ........................................................................... + // Increment the cas failures (which should always be hopefully 0). + // ........................................................................... + + ++casFailures; + + + // ........................................................................... + // Determine the starting level and the starting node + // ........................................................................... - // ......................................................................... - // CASE ONE: - // At this level we have the smallest (start) and largest (end) nodes ONLY. - // CASE TWO: - // We have arrived at the end of the nodes and we are not at the - // start of the nodes either. - // ......................................................................... + currentLevel = skiplist->_base._endNode._colLength - 1; + currentNode = &(skiplist->_base._endNode); + prevNode = currentNode; + + + START: { + - if (prevNode == &(skiplist->_base._startNode)) { - - // ....................................................................... - // We are at the lowest level of the lists, and we haven't found the item - // yet. Eventually we would like to return iterators. - // ....................................................................... - if (currentLevel == 0) { - return currentNode; + // ......................................................................... + // Find the next node in the current level of the lists. Protect yourself + // in case something has gone wrong. + // ......................................................................... + + if (prevNode == NULL) { + LOG_ERROR("CAS Failure"); + assert(0); + goto CAS_RESTART; } - // ....................................................................... - // We have not yet reached the lowest level continue down. - // ....................................................................... - --currentLevel; - goto START; - } - - - - // ......................................................................... - // CASE THREE: - // We are the smallest left most node and the NEXT node is NOT the end node. - // Compare this element with the element in the right node to see what we do. - // CASE FOUR: - // We are somewhere in the middle of a list, away from the smallest and - // largest nodes. - // ......................................................................... - - else { // nextNode != &(skiplist->_endNode - // ....................................................................... - // Use the callback to determine if the element is less or greater than - // the next node element. - // ....................................................................... - int compareResult = IndexStaticMultiCompareKeyElement(skiplist,key,&(prevNode->_element), 1); + // ......................................................................... + // We require the successor of the current node so we can perform a + // comparison. It should never be null. + // ......................................................................... - - // ....................................................................... - // We have found the item! Not possible since we are searching by key! - // ....................................................................... - if (compareResult == 0) { - assert(false); + prevNode = (TRI_skiplistEx_node_t*)(prevNode->_column[currentLevel]._prev); + if (prevNode == NULL) { + LOG_ERROR("CAS Failure"); + assert(0); + goto CAS_RESTART; } - // ....................................................................... - // The element is greater than the next node element. Keep going on this - // level. - // ....................................................................... - if (compareResult < 0) { - currentNode = prevNode; + + // ......................................................................... + // Is our next node a glass node? If so we must skip it! + // Note: since Garbage Collection is performed in TWO passes, it is possible + // that we have more than one glass node. + // ......................................................................... + + if (prevNode->_towerFlag == TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG) { + goto START; + } + + + // ......................................................................... + // WE HAVE FOUR CASES TO CONSIDER + // ......................................................................... + + // ......................................................................... + // CASE ONE: + // At this level we have the smallest (start) and largest (end) nodes ONLY. + // CASE TWO: + // We have arrived at the end of the nodes and we are not at the + // start of the nodes either. + // ......................................................................... + + if (prevNode == &(skiplist->_base._startNode)) { + + + // ....................................................................... + // We are at the lowest level of the lists, and we haven't found the item + // yet. The currentNode does NOT compare and the next node is +\infinty. + // ....................................................................... + + if (currentLevel == 0) { + return currentNode; + } + + + // ....................................................................... + // We have not yet reached the lowest level continue down. Possibly our + // item we seek is to be found a lower level. + // ....................................................................... + + prevNode = currentNode; + --currentLevel; goto START; } - - // ....................................................................... - // We have reached the lowest level of the lists -- no such item. - // ....................................................................... - if (currentLevel == 0) { - return currentNode; - } + - // ....................................................................... - // Drop down the list - // ....................................................................... - --currentLevel; + // ......................................................................... + // CASE THREE: + // We are the smallest left most node and the NEXT node is NOT the end node. + // Compare this element with the element in the right node to see what we do. + // CASE FOUR: + // We are somewhere in the middle of a list, away from the smallest and + // largest nodes. + // ......................................................................... - goto START; - } - + else { // nextNode != &(skiplist->_endNode + int compareResult; - -// END: + // ....................................................................... + // Use the callback to determine if the element is less or greater than + // the next node element. We treat the comparison by assuming we are + // looking for a "key - epsilon". With this assumption we always find the + // last key to our right if it exists. The reason this is necessary is as + // follows: we allow a multiple documents with the same key to be stored + // here with the proviso that all but the last one is marked as deleted. + // This is how we cater for multiple revisions. + // ....................................................................... + + compareResult = IndexStaticMultiCompareKeyElement(skiplist, key, &(prevNode->_element), 1); + + // ....................................................................... + // If the number of fields (attributes) in the key is LESS than the number + // of fields in the element to be compared to, then EVEN if the keys which + // which are common to both equate as EQUAL, we STILL return 1 rather than + // 0! This ensures that the right interval end point is correctly positioned + // -- slightly inefficient since the lowest level skip list 0 has to be reached + // in this case. + // ....................................................................... - assert(false); // there is no way we can be here + // ....................................................................... + // We have found the item! + // ....................................................................... + + if (compareResult == 0) { + assert(false); + } + + if (compareResult < 0) { + currentNode = prevNode; + goto START; + } + + + // ....................................................................... + // The element is less than the next node. Can we drop down the list? + // If have reached the lowest level of the lists -- no such item. + // ....................................................................... + + if (currentLevel == 0) { + return currentNode; + } + + + // ....................................................................... + // Drop down the list + // ....................................................................... + + --currentLevel; + prevNode = currentNode; + goto START; + } + + } // end of label START + + } // end of label CAS_RESTART + + assert(false); return NULL; } @@ -2528,7 +3257,10 @@ void* TRI_RightLookupByKeySkipListExMulti(TRI_skiplistEx_multi_t* skiplist, void //////////////////////////////////////////////////////////////////////////////// void* TRI_StartNodeSkipListExMulti(TRI_skiplistEx_multi_t* skiplist) { - return &(skiplist->_base._startNode); + if (skiplist != NULL) { + return &(skiplist->_base._startNode); + } + return NULL; } @@ -2556,11 +3288,19 @@ void* TRI_StartNodeSkipListExMulti(TRI_skiplistEx_multi_t* skiplist) { static void DestroyBaseSkipListEx(TRI_skiplistEx_base_t* baseSkiplist) { // ........................................................................... - // No locking, blocking or CAS here. Some asked for the index to destroyed. + // No locking, blocking or CAS here. Someone asked for the index to destroyed. // We assume that no further read/write operations are being accepted which - // require this index. + // require this index. + // TODO: + // Warning: there is a memory leak which requires fixing here. The Garbage + // collection may be working in the background and if we destroy the + // skiplist before the Garbage collection thread has been terminated - then + // bang! + // The idea is to send the Garbage collector a signal so that ALL references + // to this index are expunged, then the same process will call this function. // ........................................................................... + TRI_skiplistEx_node_t* nextNode; TRI_skiplistEx_node_t* nextNextNode; @@ -2593,7 +3333,9 @@ static void DestroySkipListExNode (TRI_skiplistEx_base_t* skiplist, TRI_skiplist return; } TRI_Free(TRI_UNKNOWN_MEM_ZONE, (void*)(node->_column)); - IndexStaticDestroyElement(skiplist, &(node->_element)); + // recall that the memory assigned for the node->_element is actually part of the node + // so we do not free that memory here - it is freed when we free the whole node + IndexStaticDestroyElement(skiplist, &(node->_element)); } @@ -2626,6 +3368,14 @@ static int GrowNewNodeHeight(TRI_skiplistEx_node_t* node, uint32_t height, uint3 return result; } + + // ............................................................................ + // In general the height is related to the colLength via the relation + // height = colLength. However, we allow for the fact that node may have a + // height much bigger than the current column length. This of course saves us + // from continually allocating and deallocating memory. + // ............................................................................ + if (colLength > height) { assert(0); return TRI_ERROR_INTERNAL; @@ -2639,12 +3389,20 @@ static int GrowNewNodeHeight(TRI_skiplistEx_node_t* node, uint32_t height, uint3 return TRI_ERROR_OUT_OF_MEMORY; } + + // .......................................................................... + // Ensure that the towers are normal, at least initially for a new node + // .......................................................................... + + node->_towerFlag = TRI_SKIPLIST_EX_NORMAL_TOWER_NODE_FLAG; + + // ........................................................................... // Initialise the storage // ........................................................................... { uint32_t j; - for (j = node->_colLength; j < height; ++j) { + for (j = 0; j < height; ++j) { node->_column[j]._prev = NULL; node->_column[j]._next = NULL; node->_column[j]._nbFlag = TRI_SKIPLIST_EX_NORMAL_NEAREST_NEIGHBOUR_FLAG; @@ -2656,16 +3414,12 @@ static int GrowNewNodeHeight(TRI_skiplistEx_node_t* node, uint32_t height, uint3 //////////////////////////////////////////////////////////////////////////////// -/// @brief joins a left node and right node together +/// @brief joins a the start node to the end node and visa versa //////////////////////////////////////////////////////////////////////////////// - - static void JoinStartEndNodes(TRI_skiplistEx_node_t* leftNode, TRI_skiplistEx_node_t* rightNode, uint32_t startLevel, uint32_t endLevel) { - uint32_t j; - if (startLevel > endLevel) { // something wrong assert(false); return; @@ -2673,21 +3427,14 @@ static void JoinStartEndNodes(TRI_skiplistEx_node_t* leftNode, // change level to height endLevel += 1; - - if (leftNode->_colLength < endLevel) { - assert(false); - return; - } - if (rightNode->_colLength < endLevel) { - assert(false); - return; - } - - for (j = startLevel; j < endLevel; ++j) { - (leftNode->_column)[j]._next = rightNode; - (rightNode->_column)[j]._prev = leftNode; - } + { + uint32_t j; + for (j = startLevel; j < endLevel; ++j) { + (leftNode->_column)[j]._next = rightNode; + (rightNode->_column)[j]._prev = leftNode; + } + } } @@ -2696,31 +3443,36 @@ static void JoinStartEndNodes(TRI_skiplistEx_node_t* leftNode, //////////////////////////////////////////////////////////////////////////////// static void* NextNodeBaseSkipListEx(TRI_skiplistEx_base_t* skiplist, void* currentNode, uint64_t thisTransID) { - TRI_skiplistEx_node_t* cn = (TRI_skiplistEx_node_t*)(currentNode); + TRI_skiplistEx_node_t* volatile nn = (TRI_skiplistEx_node_t* volatile)(currentNode); - if (cn == NULL) { - return &(skiplist->_startNode); + if (nn == NULL) { + nn = &(skiplist->_startNode); } - if (cn == &(skiplist->_endNode)) { - return NULL; - } // ........................................................................... // We are required to skip certain nodes based upon the transaction id // ........................................................................... - while (true) { - volatile TRI_skiplistEx_node_t* nn = cn->_column[0]._next; + while (nn != &(skiplist->_endNode)) { + nn = nn->_column[0]._next; + if (nn == NULL) { // this should not happen! LOG_ERROR("CAS Failure"); assert(0); return NULL; } - if ((nn->_insTransID < thisTransID) && (nn->_delTransID > thisTransID)) { - return (void*)(nn); - } + + if (nn->_insTransID > thisTransID) { // item was inserted AFTER this transaction started - skip it + continue; + } + + if (nn->_delTransID <= thisTransID) { // item has been previously deleted - skip it + continue; + } + + return (void*)(nn); }; return(NULL); @@ -2733,31 +3485,37 @@ static void* NextNodeBaseSkipListEx(TRI_skiplistEx_base_t* skiplist, void* curre //////////////////////////////////////////////////////////////////////////////// static void* PrevNodeBaseSkipListEx(TRI_skiplistEx_base_t* skiplist, void* currentNode, uint64_t thisTransID) { - TRI_skiplistEx_node_t* cn = (TRI_skiplistEx_node_t*)(currentNode); + TRI_skiplistEx_node_t* volatile pn = (TRI_skiplistEx_node_t*)(currentNode); - if (cn == NULL) { + if (pn == NULL) { return &(skiplist->_endNode); } - if (cn == &(skiplist->_startNode)) { - return NULL; - } // ........................................................................... // We are required to skip certain nodes based upon the transaction id // ........................................................................... - while (true) { - volatile TRI_skiplistEx_node_t* nn = cn->_column[0]._prev; - if (nn == NULL) { + while (pn != &(skiplist->_startNode)) { + pn = pn->_column[0]._prev; + + if (pn == NULL) { // this should not happen! LOG_ERROR("CAS Failure"); assert(0); return NULL; } - if ((nn->_insTransID < thisTransID) && (nn->_delTransID > thisTransID)) { - return (void*)(nn); - } + + if (pn->_insTransID > thisTransID) { // item was inserted AFTER this transaction started - skip it + continue; + } + + if (pn->_delTransID <= thisTransID) { // item has been previously deleted - skip it + continue; + } + + return (void*)(pn); + }; return(NULL); @@ -2784,7 +3542,7 @@ static int32_t RandLevel (TRI_skiplistEx_base_t* skiplist) { *ptr = TRI_UInt32Random(); ++ptr; } - ptr = skiplist->_random; + ptr = skiplist->_random; // go back to the begining // ........................................................................... @@ -2875,7 +3633,8 @@ static int GrowStartEndNodes(TRI_skiplistEx_base_t* skiplist, uint32_t newHeight // ................................................................................ while (true) { - if (TRI_CompareAndSwapIntegerUInt32(&(skiplist->_growStartEndNodesFlag), TRI_SKIPLIST_EX_FREE_TO_GROW_START_END_NODES_FLAG, + if (TRI_CompareAndSwapIntegerUInt32(&(skiplist->_growStartEndNodesFlag), + TRI_SKIPLIST_EX_FREE_TO_GROW_START_END_NODES_FLAG, TRI_SKIPLIST_EX_NOT_FREE_TO_GROW_START_END_NODES_FLAG) ) { break; } @@ -2884,11 +3643,11 @@ static int GrowStartEndNodes(TRI_skiplistEx_base_t* skiplist, uint32_t newHeight LOG_ERROR("CAS failed for GrowStartEndNodes"); return TRI_WARNING_ARANGO_INDEX_SKIPLIST_INSERT_CAS_FAILURE; } - usleep(1000); + usleep(CAS_FAILURE_SLEEP_TIME); } oldStartHeight = skiplist->_startNode._colLength; - oldEndHeight = skiplist->_startNode._colLength; + oldEndHeight = skiplist->_endNode._colLength; if (oldStartHeight != oldEndHeight) { result = TRI_ERROR_INTERNAL; @@ -2897,11 +3656,9 @@ static int GrowStartEndNodes(TRI_skiplistEx_base_t* skiplist, uint32_t newHeight if (result == TRI_ERROR_NO_ERROR) { if (oldStartHeight < newHeight) { // ............................................................................ - // need a CAS statement here since we may have multiple readers busy ready + // need a CAS statement here since we may have multiple readers busy reading // the height of the towers. // ............................................................................ - skiplist->_startNode._colLength = newHeight; - skiplist->_endNode._colLength = newHeight; if (!TRI_CompareAndSwapIntegerUInt32(&(skiplist->_startNode._colLength), oldStartHeight, newHeight) ) { // should never happen @@ -2913,7 +3670,7 @@ static int GrowStartEndNodes(TRI_skiplistEx_base_t* skiplist, uint32_t newHeight // should never happen result = TRI_WARNING_ARANGO_INDEX_SKIPLIST_INSERT_CAS_FAILURE; } - if (result != TRI_ERROR_NO_ERROR) { + if (result != TRI_ERROR_NO_ERROR) { // undo all of good work TRI_CompareAndSwapIntegerUInt32(&(skiplist->_startNode._colLength), newHeight, oldStartHeight); } } @@ -2925,10 +3682,10 @@ static int GrowStartEndNodes(TRI_skiplistEx_base_t* skiplist, uint32_t newHeight TRI_SKIPLIST_EX_NOT_FREE_TO_GROW_START_END_NODES_FLAG, TRI_SKIPLIST_EX_FREE_TO_GROW_START_END_NODES_FLAG) ) { // .............................................................................. - // not possible - eventually send signal to database to rebuild index + // failure is not a word we recognise - eventually send signal to database to rebuild index // .............................................................................. LOG_ERROR("CAS failed for GrowStartEndNodes"); - assert(0); + assert(0); // remove after debugging if (result == TRI_ERROR_NO_ERROR) { return TRI_WARNING_ARANGO_INDEX_SKIPLIST_INSERT_CAS_FAILURE; } @@ -3108,7 +3865,12 @@ static int JoinNewNodeCas (TRI_skiplistEx_node_t* newNode) { if ( (leftNode->_towerFlag != TRI_SKIPLIST_EX_NORMAL_TOWER_NODE_FLAG) || (rightNode->_towerFlag != TRI_SKIPLIST_EX_NORMAL_TOWER_NODE_FLAG) ) { result = UndoBricking (newNode, brickCounter); - return result; + if (result != TRI_ERROR_NO_ERROR) { + LOG_ERROR("failed unbricking"); + abort(); + return result; + } + return TRI_WARNING_ARANGO_INDEX_SKIPLIST_INSERT_CAS_FAILURE; } } @@ -3129,7 +3891,7 @@ static int JoinNewNodeCas (TRI_skiplistEx_node_t* newNode) { ////////////////////////////////////////////////////////////////////////////////// -// removal +// removal static functions below ////////////////////////////////////////////////////////////////////////////////// static int SelfUndoBricking(TRI_skiplistEx_node_t* node, const int counter) { @@ -3260,6 +4022,7 @@ static int UnJoinOldNodeCas (TRI_skiplistEx_node_t* oldNode) { int brickCounter = 0; int pointerCounter = 0; int result = TRI_ERROR_NO_ERROR; + bool ok; // Pass 1: brick the nearest neighbours on the node itself. result = SelfBricking(oldNode, &selfBrickCounter); @@ -3269,16 +4032,30 @@ static int UnJoinOldNodeCas (TRI_skiplistEx_node_t* oldNode) { // Pass 2: make the node glass - if (!TRI_CompareAndSwapIntegerUInt32 (&(oldNode->_towerFlag), + ok = TRI_CompareAndSwapIntegerUInt32 (&(oldNode->_towerFlag), TRI_SKIPLIST_EX_NORMAL_TOWER_NODE_FLAG, - TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG) ) { - SelfUndoBricking(oldNode,selfBrickCounter); + TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG); + + if (!ok) { + result = SelfUndoBricking(oldNode,selfBrickCounter); + if (result != TRI_ERROR_NO_ERROR) { + LOG_ERROR("UnJoinOldNodeCas failed "); + abort(); + return TRI_ERROR_INTERNAL; + } return TRI_WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_CAS_FAILURE; } // Pass 3: unbrick each nearest neigbour node here result = SelfUndoBricking(oldNode,selfBrickCounter); if (result != TRI_ERROR_NO_ERROR) { + // undo the glassing of the node + ok = TRI_CompareAndSwapIntegerUInt32(&(oldNode->_towerFlag), TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG, TRI_SKIPLIST_EX_NORMAL_TOWER_NODE_FLAG); + if (!ok) { + LOG_ERROR("UnJoinOldNodeCas failed"); + abort(); + return TRI_ERROR_INTERNAL; + } return result; } @@ -3286,9 +4063,15 @@ static int UnJoinOldNodeCas (TRI_skiplistEx_node_t* oldNode) { // Pass 4: brick each of it's nearest neighbours result = DoBricking(oldNode, &brickCounter); if (result != TRI_ERROR_NO_ERROR) { - TRI_CompareAndSwapIntegerUInt32 (&(oldNode->_towerFlag), - TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG, - TRI_SKIPLIST_EX_NORMAL_TOWER_NODE_FLAG); + // undo the glassing of the node + ok = TRI_CompareAndSwapIntegerUInt32 (&(oldNode->_towerFlag), + TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG, + TRI_SKIPLIST_EX_NORMAL_TOWER_NODE_FLAG); + if (!ok) { + LOG_ERROR("UnJoinOldNodeCas failed"); + abort(); + return TRI_ERROR_INTERNAL; + } if (result != TRI_ERROR_INTERNAL) { return TRI_WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_CAS_FAILURE; } @@ -3298,10 +4081,23 @@ static int UnJoinOldNodeCas (TRI_skiplistEx_node_t* oldNode) { // Pass 5: unjoin the old node from the list by assigning pointers result = DoUnjoinPointers(oldNode, &pointerCounter); if (result != TRI_ERROR_NO_ERROR) { - UndoBricking(oldNode,brickCounter); - TRI_CompareAndSwapIntegerUInt32 (&(oldNode->_towerFlag), - TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG, - TRI_SKIPLIST_EX_NORMAL_TOWER_NODE_FLAG); + int tempResult; + tempResult = UndoBricking(oldNode,brickCounter); + if (tempResult != TRI_ERROR_NO_ERROR) { + LOG_ERROR("UnJoinOldNodeCas failed"); + abort(); + return TRI_ERROR_INTERNAL; + } + + ok = TRI_CompareAndSwapIntegerUInt32 (&(oldNode->_towerFlag), + TRI_SKIPLIST_EX_GLASS_TOWER_NODE_FLAG, + TRI_SKIPLIST_EX_NORMAL_TOWER_NODE_FLAG); + if (!ok) { + LOG_ERROR("UnJoinOldNodeCas failed"); + abort(); + return TRI_ERROR_INTERNAL; + } + if (result != TRI_ERROR_INTERNAL) { return TRI_WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_CAS_FAILURE; } diff --git a/arangod/SkipListsEx/skiplistEx.h b/arangod/SkipListsEx/skiplistEx.h index d7468acb13..25b42d784d 100644 --- a/arangod/SkipListsEx/skiplistEx.h +++ b/arangod/SkipListsEx/skiplistEx.h @@ -187,21 +187,6 @@ typedef struct TRI_skiplistEx_s { TRI_skiplistEx_t; -//////////////////////////////////////////////////////////////////////////////// -/// @brief structure used for a skip list which only accepts unique entries and is thread safe -//////////////////////////////////////////////////////////////////////////////// - -//////////////////////////////////////////////////////////////////////////////// -// structure for a skiplist which allows unique entries -- with locking -// available for its nearest neighbours. -// TODO: implement locking for nearest neighbours rather than for all of index -//////////////////////////////////////////////////////////////////////////////// - -typedef struct TRI_skiplistEx_synced_s { - TRI_skiplistEx_t _base; - TRI_read_write_lock_t _lock; -} TRI_skiplistEx_synced_t; - //////////////////////////////////////////////////////////////////////////////// /// @} @@ -321,7 +306,8 @@ void* TRI_PrevNodeSkipListEx (TRI_skiplistEx_t*, void*, uint64_t thisTransID); //////////////////////////////////////////////////////////////////////////////// int TRI_RemoveElementSkipListEx (TRI_skiplistEx_t*, void*, void*, - const int passLevel, const uint64_t thisTransID); + const int passLevel, const uint64_t thisTransID, + TRI_skiplistEx_node_t**); @@ -330,7 +316,8 @@ int TRI_RemoveElementSkipListEx (TRI_skiplistEx_t*, void*, void*, //////////////////////////////////////////////////////////////////////////////// int TRI_RemoveKeySkipListEx (TRI_skiplistEx_t*, void*, void*, - const int passLevel, const uint64_t thisTransID); + const int passLevel, const uint64_t thisTransID, + TRI_skiplistEx_node_t**); @@ -359,7 +346,7 @@ void* TRI_StartNodeSkipListEx (TRI_skiplistEx_t*); //////////////////////////////////////////////////////////////////////////////// -/// @brief structure used for a multi skiplist +/// @brief structure used for skiplist accepting duplicate entries //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// @@ -387,17 +374,6 @@ typedef struct TRI_skiplistEx_multi_s { -//////////////////////////////////////////////////////////////////////////////// -/// @brief structure used for a multi skip list and is thread safe -//////////////////////////////////////////////////////////////////////////////// - -typedef struct TRI_skiplistEx_synced_multi_s { - TRI_skiplistEx_t _base; - TRI_read_write_lock_t _lock; -} TRI_skiplistEx_synced_multi_t; - - - //////////////////////////////////////////////////////////////////////////////// /// @} //////////////////////////////////////////////////////////////////////////////// @@ -526,7 +502,8 @@ void* TRI_PrevNodeSkipListExMulti (TRI_skiplistEx_multi_t*, void*, uint64_t this //////////////////////////////////////////////////////////////////////////////// int TRI_RemoveElementSkipListExMulti (TRI_skiplistEx_multi_t*, void*, void*, - const int passLevel, const uint64_t thisTransID); + const int passLevel, const uint64_t thisTransID, + TRI_skiplistEx_node_t**); @@ -535,7 +512,8 @@ int TRI_RemoveElementSkipListExMulti (TRI_skiplistEx_multi_t*, void*, void*, //////////////////////////////////////////////////////////////////////////////// int TRI_RemoveKeySkipListExMulti (TRI_skiplistEx_multi_t*, void*, void*, - const int passLevel, const uint64_t thisTransID); + const int passLevel, const uint64_t thisTransID, + TRI_skiplistEx_node_t**); diff --git a/arangod/SkipListsEx/skiplistExIndex.c b/arangod/SkipListsEx/skiplistExIndex.c index 07c622501b..c5a9ce565d 100644 --- a/arangod/SkipListsEx/skiplistExIndex.c +++ b/arangod/SkipListsEx/skiplistExIndex.c @@ -36,6 +36,7 @@ //////////////////////////////////////////////////////////////////////////////// #include "skiplistExIndex.h" +#include "VocBase/index-garbage-collector.h" //------------------------------------------------------------------------------ @@ -977,14 +978,98 @@ int SkiplistExIndex_insert(SkiplistExIndex* skiplistExIndex, SkiplistExIndexElem /// @brief removes an entry from the skip list ////////////////////////////////////////////////////////////////////////////////// +static int CollectSkiplistExGarbage(TRI_index_gc_t* indexGCData) { + int result = TRI_ERROR_NO_ERROR; + SkiplistExIndex* skiplistIndex; + TRI_skiplistEx_node_t* passNode; + TRI_skiplistEx_t* skiplist; + + if (indexGCData == NULL) { + abort(); // remove after debugging + return TRI_ERROR_INTERNAL; + } + + skiplistIndex = (SkiplistExIndex*)(indexGCData->_index); + if (skiplistIndex == NULL) { + abort(); // remove after debugging + return TRI_ERROR_INTERNAL; + } + + skiplist = (skiplistIndex->_skiplistEx).uniqueSkiplistEx; + + passNode = (TRI_skiplistEx_node_t*)(indexGCData->_data); + if (passNode == NULL) { + abort(); // remove after debugging + return TRI_ERROR_INTERNAL; + } + + + switch (indexGCData->_lastPass) { + case 1: { // the first call from the garbage collector + result = TRI_RemoveElementSkipListEx (skiplist, NULL, NULL, 2, passNode->_delTransID, &passNode); + break; + } + + case 2: { + result = TRI_RemoveElementSkipListEx (skiplist, NULL, NULL, 3, passNode->_delTransID, &passNode); + break; + } + + case 254: { // just before the node is excised from the skiplist + result = TRI_ERROR_NO_ERROR; + break; + } + + case 255: { // just AFTER the node is excised from the skiplist + result = TRI_ERROR_NO_ERROR; + break; + } + + default : { + abort(); + } + + } // end of switch statement + + + return result; +} + + int SkiplistExIndex_remove(SkiplistExIndex* skiplistExIndex, SkiplistExIndexElement* element, uint64_t thisTransID) { int result; + TRI_skiplistEx_node_t* passNode; + TRI_index_gc_t indexGCData; + // ............................................................................ // This has been called from the database so it has a pass level of 1 // ............................................................................ - result = TRI_RemoveElementSkipListEx(skiplistExIndex->_skiplistEx.uniqueSkiplistEx, element, NULL, 1, thisTransID); + result = TRI_RemoveElementSkipListEx(skiplistExIndex->_skiplistEx.uniqueSkiplistEx, element, + NULL, 1, thisTransID, &passNode); + if (result == TRI_ERROR_NO_ERROR) { + + // .......................................................................... + // add to garbage collection + // .......................................................................... + + indexGCData._index = (void*)(skiplistExIndex); + indexGCData._passes = 2; + indexGCData._lastPass = 0; // will be assigned correctly by the GC + indexGCData._transID = 0; // will be assigned correctly by the GC + indexGCData._data = passNode; // the address of the node in the linked list which will eventually be excised + indexGCData._collectGarbage = CollectSkiplistExGarbage; + + // ........................................................................... + // Adds an item to the rubbish collection linked list. + // This can fail if the GC is busy and the CAS statements in the GC fail. + // It is up to the calling procedure to determine what to do with this failure, + // generally a retry will suffice. + // ........................................................................... + result = TRI_AddToIndexGC(&indexGCData); + } + return result; } @@ -1358,14 +1443,37 @@ int MultiSkiplistExIndex_insert(SkiplistExIndex* skiplistExIndex, SkiplistExInde /// @brief removes an entry from the skiplist ////////////////////////////////////////////////////////////////////////////////// +static int CollectSkiplistExMultiGarbage(TRI_index_gc_t* indexGCData) { + int result = TRI_ERROR_NO_ERROR; + return result; +} + + int MultiSkiplistExIndex_remove(SkiplistExIndex* skiplistExIndex, SkiplistExIndexElement* element, uint64_t thisTransID) { int result; + TRI_skiplistEx_node_t* passNode; + TRI_index_gc_t indexGCData; // ............................................................................ // This has been called from the database so it has a pass level of 1 // ............................................................................ - result = TRI_RemoveElementSkipListExMulti(skiplistExIndex->_skiplistEx.nonUniqueSkiplistEx, element, NULL, 1, thisTransID); + result = TRI_RemoveElementSkipListExMulti(skiplistExIndex->_skiplistEx.nonUniqueSkiplistEx, + element, NULL, 1, thisTransID, &passNode); + if (result == TRI_ERROR_NO_ERROR) { + + // .......................................................................... + // add to garbage collection + // .......................................................................... + + indexGCData._index = (void*)(skiplistExIndex); + indexGCData._passes = 2; + indexGCData._lastPass = 0; // will be assigned correctly by the GC + indexGCData._transID = 0; // will be assigned correctly by the GC + indexGCData._data = passNode; // the address of the node in the linked list which will eventually be excised + indexGCData._collectGarbage = CollectSkiplistExMultiGarbage; + result = TRI_AddToIndexGC(&indexGCData); // adds an item to the rubbish collection linked list + } return result; } diff --git a/arangod/V8Server/v8-vocbase.cpp b/arangod/V8Server/v8-vocbase.cpp index 1a5c0da441..d992e2dcbe 100644 --- a/arangod/V8Server/v8-vocbase.cpp +++ b/arangod/V8Server/v8-vocbase.cpp @@ -1443,7 +1443,11 @@ static v8::Handle CreateVocBase (v8::Arguments const& argv, TRI_col_t TRI_V8_EXCEPTION_INTERNAL(scope, "cannot extract vocbase"); } - // expecting at least one arguments + + // ........................................................................... + // We require exactly 1 or exactly 2 arguments -- anything else is an error + // ........................................................................... + if (argv.Length() < 1 || argv.Length() > 2) { TRI_V8_EXCEPTION_USAGE(scope, "_create(, )"); } diff --git a/arangod/VocBase/index-garbage-collector.c b/arangod/VocBase/index-garbage-collector.c index 247cc67ba7..97235c8870 100644 --- a/arangod/VocBase/index-garbage-collector.c +++ b/arangod/VocBase/index-garbage-collector.c @@ -39,7 +39,6 @@ #include "VocBase/transaction.h" -// @@@@@@@@ TODO: TRI_addToIOndexGC & ExciseNode // ----------------------------------------------------------------------------- // --SECTION-- private constants @@ -50,7 +49,13 @@ /// @{ //////////////////////////////////////////////////////////////////////////////// -#define MAX_INDEX_GC_CAS_RETRIES 100 + +//////////////////////////////////////////////////////////////////////////////// +/// @brief the number of times the Garbage Collector will retry when a CAS statement fails +//////////////////////////////////////////////////////////////////////////////// + +static int const MAX_INDEX_GC_CAS_RETRIES = 100; + //////////////////////////////////////////////////////////////////////////////// /// @brief the period between garbage collection tries in microseconds @@ -58,6 +63,13 @@ static int const INDEX_GC_INTERVAL = (1 * 1000 * 1000); + +//////////////////////////////////////////////////////////////////////////////// +/// @brief the amount of time to sleep when a CAS statement fails (in microseconds) +//////////////////////////////////////////////////////////////////////////////// + +static unsigned int CAS_FAILURE_SLEEP_TIME = 1000; + // ............................................................................. // The rubbish collection operates as a simple linked list. Whenever an index // requests an item to be added to the collector, we insert a node at the end @@ -80,13 +92,17 @@ typedef struct linked_list_s { linked_list_node_t _startNode; linked_list_node_t _endNode; volatile uint32_t _listFlag; + uint64_t _size; } linked_list_t; + enum { INDEX_GC_LIST_NORMAL_FLAG, INDEX_GC_LIST_FORBIDDEN_FLAG, INDEX_GC_NODE_NORMAL_FLAG, + INDEX_GC_NODE_BRICKED_FLAG, INDEX_GC_NODE_DELETED_FLAG, + INDEX_GC_NODE_INSERTED_FLAG }; @@ -95,6 +111,8 @@ enum { // ............................................................................. static linked_list_t* INDEX_GC_LINKED_LIST = NULL; +static void* INDEX_GC_DATA = NULL; + //////////////////////////////////////////////////////////////////////////////// /// @} @@ -111,10 +129,23 @@ static linked_list_t* INDEX_GC_LINKED_LIST = NULL; //////////////////////////////////////////////////////////////////////////////// static int ExciseNode (linked_list_node_t*); +static int ExciseNodeBrick (linked_list_node_t* nodeToExcise, linked_list_node_t* prevNode, linked_list_node_t* nextNode); +static int ExciseNodeBrickUndo (linked_list_node_t* nodeToExcise, linked_list_node_t* prevNode, linked_list_node_t* nextNode, int bricked); +static int ExciseNodeSwapPointers (linked_list_node_t* nodeToExcise, linked_list_node_t* prevNode, linked_list_node_t* nextNode); +static int ExciseNodeSwapPointersUndo(linked_list_node_t* nodeToExcise, linked_list_node_t* prevNode, linked_list_node_t* nextNode, int swaped); + static void InitialiseStaticLinkedList (void); static void InnerThreadLoop (bool*); + +static int InsertNode (linked_list_node_t*); +static int InsertNodeBrick (linked_list_node_t* prevNode, linked_list_node_t* nextNode); +static int InsertNodeBrickUndo (linked_list_node_t* prevNode, linked_list_node_t* nextNode, int bricked); +static int InsertNodeSwapPointers (linked_list_node_t* nodeToInsert, linked_list_node_t* prevNode, linked_list_node_t* nextNode); +static int InsertNodeSwapPointersUndo (linked_list_node_t* nodeToInsert, linked_list_node_t* prevNode, linked_list_node_t* nextNode, int swaped); + static void RemoveLinkedList (void); static void SetForbiddenFlag (void); +static void UnsetForbiddenFlag (void); //////////////////////////////////////////////////////////////////////////////// /// @} @@ -156,7 +187,8 @@ void TRI_IndexGCVocBase (void* data) { // Initialise the static linked list: INDEX_GC_LINKED_LIST // .......................................................................... - // InitialiseStaticLinkedList(); + InitialiseStaticLinkedList(); + INDEX_GC_DATA = data; // .......................................................................... @@ -175,7 +207,7 @@ void TRI_IndexGCVocBase (void* data) { int oldState = vocbase->_state; - /* + // ........................................................................ // The loop goes to sleep whenever we are at the end of the linked list. // ........................................................................ @@ -185,9 +217,10 @@ void TRI_IndexGCVocBase (void* data) { &(INDEX_GC_LINKED_LIST->_endNode)); InnerThreadLoop (&goToSleep); - */ - goToSleep = true; + + // goToSleep = true; + //printf("oreste:%s:%d:gotosleep=%d:state=%d\n",__FILE__,__LINE__,goToSleep,vocbase->_state); if (vocbase->_state == 1 && goToSleep) { // only sleep while server is still running @@ -209,13 +242,22 @@ void TRI_IndexGCVocBase (void* data) { // can be made. // .......................................................................... - //SetForbiddenFlag(); + SetForbiddenFlag(); + + + // .......................................................................... + // We need to wait a little while in case there are any other threads which + // are busy adding things to the collector + // .......................................................................... + + usleep(INDEX_GC_INTERVAL); + // .......................................................................... // Remove all memory we assigned to any structures // .......................................................................... - //RemoveLinkedList(); + RemoveLinkedList(); LOG_TRACE("the index garbage collector event loop has stopped"); } @@ -234,10 +276,38 @@ void TRI_IndexGCVocBase (void* data) { /// @{ //////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////// +// Adds a node to the linked list, so that eventually the GC will remove an +// item from the given index. +//////////////////////////////////////////////////////////////////////////////// + int TRI_AddToIndexGC(TRI_index_gc_t* indexData) { int result = TRI_ERROR_NO_ERROR; - bool ok; + linked_list_node_t* insertNode; // node to be inserted into our linked list + TRI_vocbase_t* vocbase = (TRI_vocbase_t*)(INDEX_GC_DATA); + + // ........................................................................... + // Check if the gc has actually started + // ........................................................................... + + if (vocbase == NULL) { + return TRI_ERROR_INTERNAL; + } + + // ........................................................................... + // Check if the server has shut down? + // ........................................................................... + + if (vocbase->_state == -1) { + return TRI_WARNING_ARANGO_INDEX_GARBAGE_COLLECTOR_SHUTDOWN; + } + + // ........................................................................... + // Check that we have something to add + // ........................................................................... + // ........................................................................... // Check that we have something to add @@ -248,32 +318,137 @@ int TRI_AddToIndexGC(TRI_index_gc_t* indexData) { } - // ........................................................................... - // Check that the rubbish collector is accepting rubbish. - // Generally this means that the server has been shut down. In this case we - // will not accept anymore rubbish. - // ........................................................................... + insertNode = (linked_list_node_t*)(TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(linked_list_node_t), true)); + + if (insertNode == NULL) { + return TRI_ERROR_OUT_OF_MEMORY; + } + + insertNode->_indexData = (TRI_index_gc_t*)(TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_index_gc_t), true)); + if (insertNode->_indexData == NULL) { + TRI_Free(TRI_UNKNOWN_MEM_ZONE, insertNode); + } - // ........................................................................... - // The indexData structure whose memory has been allocated by the INDEX - // (and not this function) will also be removed by the INDEX which called - // this function. When indexData._lastPass = 254, then the collectGarbage - // callback will be alerted to the fact that the excision of the item from the - // rubbish collector will be imminent. When indexData._lastPass = 255, then the - // collectGarbage callback will be alerted that the excision has occured and - // that any memory allocated must be deallocated. - // ........................................................................... + insertNode->_indexData->_index = indexData->_index; + insertNode->_indexData->_passes = indexData->_passes; + insertNode->_indexData->_lastPass = 0; + insertNode->_indexData->_data = indexData->_data; + insertNode->_indexData->_collectGarbage = indexData->_collectGarbage; + // TODO: get the current transaction id + //insertNode->_indexData->_transID = vocbase->_transactionStuff->_GetGlobalTransactionFigures(0); + ++(insertNode->_indexData->_transID); - /* - INDEX_GC_LIST_NORMAL_FLAG - ok = TRI_CompareAndSwapIntegerUInt32 (&(leftNN->_nbFlag), INDEX_GC_LIST_NORMAL_FLAG, INDEX_GC_LIST_NORMAL_FLAG); -*/ - - return result; + // ........................................................................... + // the assignment of the _next and _prev pointers must be done in a CAS loop + // within the IndexNode(...) function. + // ........................................................................... + + insertNode->_next = NULL; + insertNode->_prev = NULL; + insertNode->_nodeFlag = INDEX_GC_NODE_NORMAL_FLAG; + + result = InsertNode(insertNode); + + if (result != TRI_ERROR_NO_ERROR) { + TRI_Free(TRI_UNKNOWN_MEM_ZONE, insertNode->_indexData); + TRI_Free(TRI_UNKNOWN_MEM_ZONE, insertNode); + } + + return result; } + +//////////////////////////////////////////////////////////////////////////////// +// For the given index, all nodes which match the index will be excised from +// the linked list. +//////////////////////////////////////////////////////////////////////////////// + +int TRI_ExpungeIndexGC (TRI_index_gc_t* indexData) { + int result = TRI_ERROR_NO_ERROR; + linked_list_node_t* currentNode; + bool finished = true; + int casCounter = 0; + + LOG_TRACE("the index garbage collector has commenced expunging all nodes for a given index"); + + CAS_LOOP: { + + result = TRI_ERROR_NO_ERROR; + currentNode = &(INDEX_GC_LINKED_LIST->_startNode); + + if (casCounter > MAX_INDEX_GC_CAS_RETRIES) { + LOG_ERROR("max cas loop exceeded"); + return TRI_ERROR_INTERNAL; + } + + ++casCounter; + + while (currentNode != NULL) { + linked_list_node_t* tempNode = currentNode->_next; + + if (currentNode->_indexData == NULL) { + currentNode = tempNode; + continue; + } + + if (indexData->_index != currentNode->_indexData->_index) { + currentNode = tempNode; + continue; + } + + // ....................................................................... + // Just before we remove the data and associated data, go to the index + // and indicate that we are about to remove the node from the linked list + // ....................................................................... + + indexData->_lastPass = 254; + result = indexData->_collectGarbage(indexData); + if (result != TRI_ERROR_NO_ERROR) { + LOG_TRACE("the index garbage collector called the callback which returend error %d", result); + } + + + // ....................................................................... + // Actually remove the node from the linked list here + // ....................................................................... + + result = ExciseNode(currentNode); + if (result != TRI_ERROR_NO_ERROR) { + LOG_TRACE("the index garbage collector function ExcisENode returned with error %d", result); + finished = false; + currentNode = tempNode; + continue; + } + + + // ....................................................................... + // Inform the index that the node has been removed from the linked list + // ....................................................................... + + indexData->_lastPass = 255; + result = indexData->_collectGarbage(indexData); + if (result != TRI_ERROR_NO_ERROR) { + LOG_TRACE("the index garbage collector called the callback which returend error %d", result); + } + + TRI_Free(TRI_UNKNOWN_MEM_ZONE, currentNode); + currentNode = tempNode; + + } // end of while loop + + if (!finished) { + goto CAS_LOOP; + } + + } // end of CAS_LOOP + + LOG_TRACE("the index garbage collector has completed expunging nodes for a given index"); + + return result; +} + //////////////////////////////////////////////////////////////////////////////// /// @} //////////////////////////////////////////////////////////////////////////////// @@ -288,10 +463,9 @@ int TRI_AddToIndexGC(TRI_index_gc_t* indexData) { //////////////////////////////////////////////////////////////////////////////// -int ExciseNode(linked_list_node_t* nodeToExcise) { - int result = TRI_ERROR_NO_ERROR; - return result; -} +//////////////////////////////////////////////////////////////////////////////// +// Creates and initialises the linked list used by the garbage collector +//////////////////////////////////////////////////////////////////////////////// void InitialiseStaticLinkedList(void) { @@ -310,6 +484,7 @@ void InitialiseStaticLinkedList(void) { // .......................................................................... INDEX_GC_LINKED_LIST->_listFlag = INDEX_GC_LIST_FORBIDDEN_FLAG; + INDEX_GC_LINKED_LIST->_size = 0; (INDEX_GC_LINKED_LIST->_startNode)._indexData = NULL; @@ -327,7 +502,7 @@ void InitialiseStaticLinkedList(void) { // 'Unlock' the list so that other process can use it // .......................................................................... - if (TRI_CompareAndSwapIntegerUInt32 (&(INDEX_GC_LINKED_LIST->_listFlag), + if (!TRI_CompareAndSwapIntegerUInt32 (&(INDEX_GC_LINKED_LIST->_listFlag), INDEX_GC_LIST_FORBIDDEN_FLAG, INDEX_GC_LIST_NORMAL_FLAG) ) { LOG_FATAL_AND_EXIT("Index garbage collector can not start - CAS failure"); @@ -343,6 +518,7 @@ void InnerThreadLoop (bool* goToSleep) { uint64_t lastCompleteGlobalTransID = 0; TRI_transaction_global_stats_t* stats = NULL; int result; + TRI_vocbase_t* vocbase = (TRI_vocbase_t*)(INDEX_GC_DATA); stats = TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_transaction_global_stats_t), true); if (stats == NULL) { @@ -405,42 +581,85 @@ void InnerThreadLoop (bool* goToSleep) { indexData = currentNode->_indexData; + // ........................................................................ + // Check whether or not we can actually execute the call back for that + // particular pass. + // ........................................................................ + + /* TODO: this needs to be fixed with the transaction handling stuff + if (stats->oldestGlobalTransID <= indexData->_transID) { + currentNode = currentNode->_next; + continue; + } + */ + + // ........................................................................ // First lets check whether we have actually finished with this node. // ........................................................................ - if (indexData->_passes == indexData->_lastPass) { + if (indexData->_lastPass < indexData->_passes) { + ++(indexData->_lastPass); + result = indexData->_collectGarbage(indexData); + if (result != TRI_ERROR_NO_ERROR) { + LOG_TRACE("the index garbage collector called the callback which returend error %d", result); + if (result == TRI_WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_CAS_FAILURE) { + // no harm done we simply try again later + --(indexData->_lastPass); + } + } + currentNode = currentNode->_next; + } + + + else if (indexData->_passes == indexData->_lastPass) { + + // ....................................................................... + // We have finished essentially finished with the node and are about it + // to remove the node from the linked list here. + // ....................................................................... + + + // ....................................................................... + // Just before we remove the data and associated data, go to the index + // and indicate that we are about to remove the node from the linked list + // ....................................................................... + indexData->_lastPass = 254; result = indexData->_collectGarbage(indexData); if (result != TRI_ERROR_NO_ERROR) { LOG_TRACE("the index garbage collector called the callback which returend error %d", result); } + + + // ....................................................................... + // Actually remove the node from the linked list here + // ....................................................................... + result = ExciseNode(currentNode); if (result != TRI_ERROR_NO_ERROR) { LOG_TRACE("the index garbage collector function ExcisENode returned with error %d", result); } + + + // ....................................................................... + // Inform the index that the node has been removed from the linked list + // ....................................................................... + indexData->_lastPass = 255; result = indexData->_collectGarbage(indexData); if (result != TRI_ERROR_NO_ERROR) { LOG_TRACE("the index garbage collector called the callback which returend error %d", result); } + + tempNode = currentNode->_next; TRI_Free(TRI_UNKNOWN_MEM_ZONE, currentNode); currentNode = tempNode; - continue; - } - + } - // ........................................................................ - // Check whether or not we can actually execute the call back for that - // particular pass. - // ........................................................................ - - if (lastCompleteGlobalTransID <= indexData->_transID) { - currentNode = currentNode->_next; - continue; - } - } + } // end of while loop + } @@ -450,35 +669,20 @@ void RemoveLinkedList(void) { LOG_TRACE("the index garbage collector has commenced removing all allocated memory"); currentNode = &(INDEX_GC_LINKED_LIST->_startNode); - if ( currentNode->_next != NULL ) { - currentNode = currentNode->_next; - } - else { - currentNode = NULL; - } - while (currentNode != NULL) { - int result; + int result = TRI_ERROR_NO_ERROR; + linked_list_node_t* tempNode = currentNode->_next; - if (currentNode->_indexData == NULL) { - continue; - } - - currentNode->_indexData->_lastPass = 255; - - result = currentNode->_indexData->_collectGarbage(currentNode->_indexData); - if (result != TRI_ERROR_NO_ERROR) { - LOG_TRACE("the index garbage collector executed the callback and has returned error code %d",result); - } - - if ( currentNode->_next != NULL ) { - linked_list_node_t* tempNode = currentNode->_next; + if (currentNode->_indexData != NULL) { + currentNode->_indexData->_lastPass = 255; + result = currentNode->_indexData->_collectGarbage(currentNode->_indexData); + if (result != TRI_ERROR_NO_ERROR) { + LOG_TRACE("the index garbage collector executed the callback and has returned error code %d",result); + } TRI_Free(TRI_UNKNOWN_MEM_ZONE, currentNode); - currentNode = tempNode; - } - else { - currentNode = NULL; } + + currentNode = tempNode; } TRI_Free(TRI_UNKNOWN_MEM_ZONE, INDEX_GC_LINKED_LIST); @@ -488,26 +692,298 @@ void RemoveLinkedList(void) { void SetForbiddenFlag(void) { int counter = 0; - LOG_TRACE("the index garbage collector is attempting to block insertions"); + //LOG_TRACE("the index garbage collector is attempting to block insertions"); - while (counter < MAX_INDEX_GC_CAS_RETRIES) { + while (counter < MAX_INDEX_GC_CAS_RETRIES) { if (TRI_CompareAndSwapIntegerUInt32 (&(INDEX_GC_LINKED_LIST->_listFlag), INDEX_GC_LIST_NORMAL_FLAG, INDEX_GC_LIST_FORBIDDEN_FLAG) ) { counter = -1; break; } - usleep(1000); + usleep(CAS_FAILURE_SLEEP_TIME); } if (counter == -1) { - LOG_TRACE("the index garbage collector has succeeded in blocking insertions"); + //LOG_TRACE("the index garbage collector has succeeded in blocking insertions"); } else { LOG_TRACE("the index garbage collector has failed in blocking insertions"); } } +void UnsetForbiddenFlag(void) { + int counter = 0; + + //LOG_TRACE("the index garbage collector is attempting to unblock insertions"); + + while (counter < MAX_INDEX_GC_CAS_RETRIES) { + if (TRI_CompareAndSwapIntegerUInt32 (&(INDEX_GC_LINKED_LIST->_listFlag), + INDEX_GC_LIST_FORBIDDEN_FLAG, + INDEX_GC_LIST_NORMAL_FLAG) ) { + counter = -1; + break; + } + usleep(CAS_FAILURE_SLEEP_TIME); + } + + if (counter == -1) { + //LOG_TRACE("the index garbage collector has succeeded in unblocking insertions"); + } + else { + LOG_TRACE("the index garbage collector has failed in unblocking insertions\n"); + } +} + + +//////////////////////////////////////////////////////////////////////////////////// +// Implementation of static functions for insertion of a node +//////////////////////////////////////////////////////////////////////////////////// + +static int InsertNode(linked_list_node_t* insertNode) { + int casCounter = 0; + int bricked = 0; + int swaped = 0; + int result; + linked_list_node_t* nextNode; + linked_list_node_t* prevNode; + + CAS_LOOP: { + + // .......................................................................... + // We can not assign these pointers outside this loop, since these may change + // any time with threads busy inserting entries into the list. + // .......................................................................... + + insertNode->_next = &(INDEX_GC_LINKED_LIST->_endNode); + nextNode = (linked_list_node_t*)(insertNode->_next); + + insertNode->_prev = (linked_list_node_t*)(nextNode->_prev); + prevNode = (linked_list_node_t*)(insertNode->_prev); + + + if (casCounter > 1) { + usleep(CAS_FAILURE_SLEEP_TIME); + } + + if (casCounter > MAX_INDEX_GC_CAS_RETRIES) { + LOG_ERROR("max cas loop exceeded"); + return TRI_ERROR_INTERNAL; + } + + bricked = InsertNodeBrick(prevNode, nextNode); + if (bricked != 2) { + int tempResult = InsertNodeBrickUndo(prevNode, nextNode, bricked); + if (tempResult != TRI_ERROR_NO_ERROR) { + return TRI_ERROR_INTERNAL; + } + ++casCounter; + goto CAS_LOOP; + } + + swaped = InsertNodeSwapPointers(insertNode, prevNode, nextNode); + if (swaped != 2) { + int tempResult1 = InsertNodeBrickUndo(prevNode, nextNode, bricked); + int tempResult2 = InsertNodeSwapPointersUndo(insertNode, prevNode, nextNode, swaped); + if ((tempResult1 != TRI_ERROR_NO_ERROR) || (tempResult2 != TRI_ERROR_NO_ERROR)) { + return TRI_ERROR_INTERNAL; + } + ++casCounter; + goto CAS_LOOP; + } + + result = InsertNodeBrickUndo(prevNode, nextNode, bricked); + if (result != TRI_ERROR_NO_ERROR) { + return TRI_ERROR_INTERNAL; + } + ++INDEX_GC_LINKED_LIST->_size; + + } // end of CAS_LOOP + + return TRI_ERROR_NO_ERROR; +} + +static int InsertNodeBrick(linked_list_node_t* prevNode, linked_list_node_t* nextNode) { + bool ok; + + ok = TRI_CompareAndSwapIntegerUInt32 (&(prevNode->_nodeFlag), INDEX_GC_NODE_NORMAL_FLAG, INDEX_GC_NODE_BRICKED_FLAG); + if (!ok) { return 0; } + + ok = TRI_CompareAndSwapIntegerUInt32 (&(nextNode->_nodeFlag), INDEX_GC_NODE_NORMAL_FLAG, INDEX_GC_NODE_BRICKED_FLAG); + if (!ok) { return 1; } + + return 2; +} + +static int InsertNodeBrickUndo(linked_list_node_t* prevNode, linked_list_node_t* nextNode, int bricked) { + bool ok; + + if (bricked > 0) { + ok = TRI_CompareAndSwapIntegerUInt32 (&(prevNode->_nodeFlag), INDEX_GC_NODE_BRICKED_FLAG, INDEX_GC_NODE_NORMAL_FLAG); + if (bricked > 1) { + ok = (TRI_CompareAndSwapIntegerUInt32 (&(nextNode->_nodeFlag), INDEX_GC_NODE_BRICKED_FLAG, INDEX_GC_NODE_NORMAL_FLAG)) && (ok); + } + if (!ok) { + LOG_ERROR("InsertNodeBrickUndo failed here"); + return TRI_ERROR_INTERNAL; + } + } + return TRI_ERROR_NO_ERROR; +} + + +static int InsertNodeSwapPointers(linked_list_node_t* nodeToInsert, linked_list_node_t* prevNode, linked_list_node_t* nextNode) { + bool ok; + + ok = TRI_CompareAndSwapPointer(&(prevNode->_next), nextNode, nodeToInsert); + if (!ok) { return 0; } + + ok = TRI_CompareAndSwapPointer(&(nextNode->_prev), prevNode, nodeToInsert); + if (!ok) { return 1; } + + return 2; +} + +static int InsertNodeSwapPointersUndo(linked_list_node_t* nodeToInsert, linked_list_node_t* prevNode, linked_list_node_t* nextNode, int swaped) { + bool ok; + + if (swaped > 0) { + ok = TRI_CompareAndSwapPointer(&(prevNode->_next), nodeToInsert, nextNode); + if (swaped > 1) { + ok = ok && TRI_CompareAndSwapPointer(&(nextNode->_prev), nodeToInsert, prevNode); + } + if (!ok) { + LOG_ERROR("InsertNodeSwapPointersUndo failed here"); + return TRI_ERROR_INTERNAL; + } + } + return TRI_ERROR_NO_ERROR; +} + + + +//////////////////////////////////////////////////////////////////////////////////// +// Implementation of static functions for removal of a node +//////////////////////////////////////////////////////////////////////////////////// + +static int ExciseNode(linked_list_node_t* nodeToExcise) { + int result = TRI_ERROR_NO_ERROR; + int casCounter = 0; + int bricked = 0; + int swaped = 0; + linked_list_node_t* nextNode; + linked_list_node_t* prevNode; + + + SetForbiddenFlag(); + + CAS_LOOP: { + + result = TRI_ERROR_NO_ERROR; + nextNode = nodeToExcise->_next; + prevNode = nodeToExcise->_prev; + + if (casCounter > 1) { + usleep(CAS_FAILURE_SLEEP_TIME); + } + + if (casCounter > MAX_INDEX_GC_CAS_RETRIES) { + LOG_ERROR("max cas loop exceeded"); + return TRI_ERROR_INTERNAL; + } + + bricked = ExciseNodeBrick(nodeToExcise, prevNode, nextNode); + + if (bricked != 3) { + result = ExciseNodeBrickUndo(nodeToExcise, prevNode, nextNode, bricked); + if (result != TRI_ERROR_NO_ERROR) { + return result; + } + ++casCounter; + goto CAS_LOOP; + } + + + swaped = ExciseNodeSwapPointers(nodeToExcise, prevNode, nextNode); + if (swaped != 2) { + ExciseNodeBrickUndo(nodeToExcise, prevNode, nextNode, bricked); + ExciseNodeSwapPointersUndo(nodeToExcise, prevNode, nextNode, swaped); + ++casCounter; + goto CAS_LOOP; + } + + --INDEX_GC_LINKED_LIST->_size; + ExciseNodeBrickUndo(nodeToExcise, prevNode, nextNode, bricked); + + } // end of CAS_LOOP + + UnsetForbiddenFlag(); + + return result; +} + +static int ExciseNodeBrick(linked_list_node_t* nodeToExcise, linked_list_node_t* prevNode, linked_list_node_t* nextNode) { + bool ok; + + ok = TRI_CompareAndSwapIntegerUInt32 (&(nodeToExcise->_nodeFlag), INDEX_GC_NODE_NORMAL_FLAG, INDEX_GC_NODE_BRICKED_FLAG); + if (!ok) { return 0; } + + ok = TRI_CompareAndSwapIntegerUInt32 (&(prevNode->_nodeFlag), INDEX_GC_NODE_NORMAL_FLAG, INDEX_GC_NODE_BRICKED_FLAG); + if (!ok) { return 1; } + + ok = TRI_CompareAndSwapIntegerUInt32 (&(nextNode->_nodeFlag), INDEX_GC_NODE_NORMAL_FLAG, INDEX_GC_NODE_BRICKED_FLAG); + if (!ok) { return 2; } + + return 3; +} + +static int ExciseNodeBrickUndo(linked_list_node_t* nodeToExcise, linked_list_node_t* prevNode, linked_list_node_t* nextNode, int bricked) { + bool ok; + + if (bricked > 0) { + ok = TRI_CompareAndSwapIntegerUInt32 (&(nodeToExcise->_nodeFlag), INDEX_GC_NODE_BRICKED_FLAG, INDEX_GC_NODE_NORMAL_FLAG); + if (bricked > 1) { + ok = ok && TRI_CompareAndSwapIntegerUInt32 (&(prevNode->_nodeFlag), INDEX_GC_NODE_BRICKED_FLAG, INDEX_GC_NODE_NORMAL_FLAG); + if (bricked > 2) { + ok = TRI_CompareAndSwapIntegerUInt32 (&(nextNode->_nodeFlag), INDEX_GC_NODE_BRICKED_FLAG, INDEX_GC_NODE_NORMAL_FLAG); + } + } + if (!ok) { + LOG_ERROR("ExciseNodeBrickUndo failed here"); + return TRI_ERROR_INTERNAL; + } + } + return TRI_ERROR_NO_ERROR; +} + +static int ExciseNodeSwapPointers(linked_list_node_t* nodeToExcise, linked_list_node_t* prevNode, linked_list_node_t* nextNode) { + bool ok; + + ok = TRI_CompareAndSwapPointer(&(prevNode->_next), nodeToExcise, nextNode); + if (!ok) { return 0; } + + ok = TRI_CompareAndSwapPointer(&(nextNode->_prev), nodeToExcise, prevNode); + if (!ok) { return 1; } + + return 2; +} + +static int ExciseNodeSwapPointersUndo(linked_list_node_t* nodeToExcise, linked_list_node_t* prevNode, linked_list_node_t* nextNode, int swaped) { + bool ok; + + if (swaped > 0) { + ok = TRI_CompareAndSwapPointer(&(prevNode->_next), nextNode, nodeToExcise); + if (swaped > 1) { + ok = ok && TRI_CompareAndSwapPointer(&(nextNode->_prev), prevNode, nodeToExcise); + } + if (!ok) { + LOG_ERROR("ExciseNodeSwapPointersUndo failed here"); + return TRI_ERROR_INTERNAL; + } + } + return TRI_ERROR_NO_ERROR; +} + //////////////////////////////////////////////////////////////////////////////// /// @} //////////////////////////////////////////////////////////////////////////////// diff --git a/arangod/VocBase/index-garbage-collector.h b/arangod/VocBase/index-garbage-collector.h index 799b259908..f25eed17f7 100644 --- a/arangod/VocBase/index-garbage-collector.h +++ b/arangod/VocBase/index-garbage-collector.h @@ -46,22 +46,25 @@ extern "C" { /// @{ //////////////////////////////////////////////////////////////////////////////// -struct TRI_index_s; struct TRI_transaction_context_s; typedef struct TRI_index_gc_s { - struct TRI_index_s* _index; // index which requires rubbish collection - uint8_t _passes; // the number of passes to complete the rubbish collection - uint8_t _lastPass; // the last pass performed (_lastPass = 0, implies no passes performed) - uint64_t _transID; // the transaction id which must have completed before the current pass can come into effect - void* _data; // storage of data which may be required by the index + void* _index; // struct TRI_index_s* index which requires rubbish collection + uint8_t _passes; // the number of passes to complete the rubbish collection + uint8_t _lastPass; // the last pass performed (_lastPass = 0, implies no passes performed) + uint64_t _transID; // the transaction id which must have completed before the current pass can come into effect + void* _data; // storage of data which may be required by the index int (*_collectGarbage) (struct TRI_index_gc_s*); // callback which actually does the work (defined where the index is defined) } TRI_index_gc_t; -int TRI_AddToIndexGC (TRI_index_gc_t*); // adds an item to the rubbish collection linked list +int TRI_AddToIndexGC (TRI_index_gc_t*); // adds an item to the rubbish collection linked list -void TRI_IndexGCVocBase (void*); // essentially a loop called by the thread and runs 'forever' +int TRI_ExpungeIndexGC (TRI_index_gc_t*); // removes all references of an index from the garbage collector + +uint64_t TRI_GetIndexGCSize (void); // returns the number of entries in the linked list used by the garabage collector + +void TRI_IndexGCVocBase (void*); // essentially a loop called by the thread and runs 'forever' //////////////////////////////////////////////////////////////////////////////// /// @brief index garbage collector event loop diff --git a/build_win.h b/build_win.h index 7fa6d151b4..c4a5f677e4 100755 --- a/build_win.h +++ b/build_win.h @@ -1,14 +1,14 @@ #ifdef _WIN64 - #define WINDOWS_ARRANGO_VERSION_NUMBER 1.3.0 + #define WINDOWS_ARRANGO_VERSION_NUMBER 1.4 #ifdef _DEBUG - #define TRIAGENS_VERSION "1.3.0 [WIN64-DEBUG ALPHA 2]" + #define TRIAGENS_VERSION "1.4 [WIN64-DEBUG DEVEL]" #else - #define TRIAGENS_VERSION "1.3.0 [WIN64-RELEASE ALPHA 2]" + #define TRIAGENS_VERSION "1.4 [WIN64-RELEASE DEVEL]" #endif #else #ifdef _DEBUG - #define TRIAGENS_VERSION "1.3.0 [WIN32-DEBUG ALPHA 2]" + #define TRIAGENS_VERSION "1.4 [WIN32-DEBUG DEVEL]" #else - #define TRIAGENS_VERSION "1.3.0 [WIN32-RELEASE ALPHA 2]" + #define TRIAGENS_VERSION "1.4 [WIN32-RELEASE DEVEL]" #endif #endif diff --git a/html/admin/js/bootstrap/errors.js b/html/admin/js/bootstrap/errors.js index bc5b79fd05..7132aabfb9 100644 --- a/html/admin/js/bootstrap/errors.js +++ b/html/admin/js/bootstrap/errors.js @@ -167,6 +167,8 @@ "WARNING_ARANGO_INDEX_SKIPLIST_INSERT_CAS_FAILURE" : { "code" : 3304, "message" : "skiplist index insertion warning - CAS failure while attempting to insert document" }, "WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_MISSING" : { "code" : 3311, "message" : "skiplist index remove failure - item missing in index" }, "WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_CAS_FAILURE" : { "code" : 3313, "message" : "skiplist index remove warning - CAS failure while attempting to remove document" }, + "WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_POST_INSERTED" : { "code" : 3315, "message" : "skiplist index remove failure - item inserted post this transaction in the index" }, + "WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_PRIOR_REMOVED" : { "code" : 3317, "message" : "skiplist index remove failure - item removed prior this transaction in the index" }, "WARNING_ARANGO_INDEX_BITARRAY_DOCUMENT_ATTRIBUTE_MISSING" : { "code" : 3400, "message" : "bitarray index insertion warning - attribute missing in document" }, "WARNING_ARANGO_INDEX_BITARRAY_UPDATE_ATTRIBUTE_MISSING" : { "code" : 3402, "message" : "bitarray index update warning - attribute missing in revised document" }, "WARNING_ARANGO_INDEX_BITARRAY_REMOVE_ITEM_MISSING" : { "code" : 3411, "message" : "bitarray index remove failure - item missing in index" }, @@ -176,7 +178,8 @@ "RESULT_KEY_EXISTS" : { "code" : 10000, "message" : "element not inserted into structure, because key already exists" }, "RESULT_ELEMENT_EXISTS" : { "code" : 10001, "message" : "element not inserted into structure, because it already exists" }, "RESULT_KEY_NOT_FOUND" : { "code" : 10002, "message" : "key not found in structure" }, - "RESULT_ELEMENT_NOT_FOUND" : { "code" : 10003, "message" : "element not found in structure" } + "RESULT_ELEMENT_NOT_FOUND" : { "code" : 10003, "message" : "element not found in structure" }, + "WARNING_ARANGO_INDEX_GARBAGE_COLLECTOR_SHUTDOWN" : { "code" : 11000, "message" : "the index garbage collector has shutdown and no further entries can be processed" } }; }()); diff --git a/js/common/bootstrap/errors.js b/js/common/bootstrap/errors.js index bc5b79fd05..7132aabfb9 100644 --- a/js/common/bootstrap/errors.js +++ b/js/common/bootstrap/errors.js @@ -167,6 +167,8 @@ "WARNING_ARANGO_INDEX_SKIPLIST_INSERT_CAS_FAILURE" : { "code" : 3304, "message" : "skiplist index insertion warning - CAS failure while attempting to insert document" }, "WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_MISSING" : { "code" : 3311, "message" : "skiplist index remove failure - item missing in index" }, "WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_CAS_FAILURE" : { "code" : 3313, "message" : "skiplist index remove warning - CAS failure while attempting to remove document" }, + "WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_POST_INSERTED" : { "code" : 3315, "message" : "skiplist index remove failure - item inserted post this transaction in the index" }, + "WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_PRIOR_REMOVED" : { "code" : 3317, "message" : "skiplist index remove failure - item removed prior this transaction in the index" }, "WARNING_ARANGO_INDEX_BITARRAY_DOCUMENT_ATTRIBUTE_MISSING" : { "code" : 3400, "message" : "bitarray index insertion warning - attribute missing in document" }, "WARNING_ARANGO_INDEX_BITARRAY_UPDATE_ATTRIBUTE_MISSING" : { "code" : 3402, "message" : "bitarray index update warning - attribute missing in revised document" }, "WARNING_ARANGO_INDEX_BITARRAY_REMOVE_ITEM_MISSING" : { "code" : 3411, "message" : "bitarray index remove failure - item missing in index" }, @@ -176,7 +178,8 @@ "RESULT_KEY_EXISTS" : { "code" : 10000, "message" : "element not inserted into structure, because key already exists" }, "RESULT_ELEMENT_EXISTS" : { "code" : 10001, "message" : "element not inserted into structure, because it already exists" }, "RESULT_KEY_NOT_FOUND" : { "code" : 10002, "message" : "key not found in structure" }, - "RESULT_ELEMENT_NOT_FOUND" : { "code" : 10003, "message" : "element not found in structure" } + "RESULT_ELEMENT_NOT_FOUND" : { "code" : 10003, "message" : "element not found in structure" }, + "WARNING_ARANGO_INDEX_GARBAGE_COLLECTOR_SHUTDOWN" : { "code" : 11000, "message" : "the index garbage collector has shutdown and no further entries can be processed" } }; }()); diff --git a/lib/BasicsC/errors.dat b/lib/BasicsC/errors.dat index 11914ec6fd..edd9e754cf 100755 --- a/lib/BasicsC/errors.dat +++ b/lib/BasicsC/errors.dat @@ -251,6 +251,8 @@ WARNING_ARANGO_INDEX_SKIPLIST_UPDATE_ATTRIBUTE_MISSING,3302,"skiplist index upda WARNING_ARANGO_INDEX_SKIPLIST_INSERT_CAS_FAILURE,3304,"skiplist index insertion warning - CAS failure while attempting to insert document","Will be raised when an attempt to insert a document into a skiplist index fails due to repeated CAS failures/clashes." WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_MISSING,3311,"skiplist index remove failure - item missing in index","Will be raised when an attempt to remove a document from a skiplist index fails when document can not be located within that index." WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_CAS_FAILURE,3313,"skiplist index remove warning - CAS failure while attempting to remove document","Will be raised when an attempt to remove a document into a skiplist index fails due to repeated CAS failures/clashes." +WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_POST_INSERTED,3315,"skiplist index remove failure - item inserted post this transaction in the index","Will be raised when an attempt to remove a document from a skiplist index fails due to the fact that the document to be removed was inserted in a transaction post this removal transaction." +WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_PRIOR_REMOVED,3317,"skiplist index remove failure - item removed prior this transaction in the index","Will be raised when an attempt to remove a document from a skiplist index fails due to the fact that the document to be removed was removed in a transaction prior this removal transaction." WARNING_ARANGO_INDEX_BITARRAY_DOCUMENT_ATTRIBUTE_MISSING,3400,"bitarray index insertion warning - attribute missing in document","Will be raised when an attempt to insert a document into a bitarray index is caused by in the document not having one or more attributes which are required by the bitarray index." @@ -269,3 +271,9 @@ RESULT_KEY_EXISTS,10000,"element not inserted into structure, because key alread RESULT_ELEMENT_EXISTS,10001,"element not inserted into structure, because it already exists","Will be returned if the element was not insert because it already exists." RESULT_KEY_NOT_FOUND,10002,"key not found in structure","Will be returned if the key was not found in the structure." RESULT_ELEMENT_NOT_FOUND,10003,"element not found in structure","Will be returned if the element was not found in the structure." + +################################################################################ +## WARNING when a thread is in a state of shutdown +################################################################################ + +WARNING_ARANGO_INDEX_GARBAGE_COLLECTOR_SHUTDOWN,11000,"the index garbage collector has shutdown and no further entries can be processed","Will be raised when an attempt to add an item to the index garbage collector fails due to the fact that the state of the collector is in shutdown mode." diff --git a/lib/BasicsC/files.c b/lib/BasicsC/files.c index 4534594454..48695e5d36 100644 --- a/lib/BasicsC/files.c +++ b/lib/BasicsC/files.c @@ -1080,7 +1080,7 @@ int TRI_VerifyLockFile (char const* filename) { TRI_CLOSE(fd); // file empty or pid too long - if (n == 0 || n == sizeof(buf)) { + if (n == 0 || n == sizeof(buffer)) { return TRI_set_errno(TRI_ERROR_ILLEGAL_NUMBER); } diff --git a/lib/BasicsC/locks-posix.c b/lib/BasicsC/locks-posix.c index d43543cca0..e80417782c 100644 --- a/lib/BasicsC/locks-posix.c +++ b/lib/BasicsC/locks-posix.c @@ -574,7 +574,7 @@ bool TRI_CompareAndSwapIntegerInt32 (volatile int32_t* theValue, int32_t oldValu #if __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 return OSAtomicCompareAndSwap32Barrier(oldValue, newValue, theValue); #elif (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 - return __sync_val_compare_and_swap(theValue, oldValue, newValue); + return __sync_bool_compare_and_swap(theValue, oldValue, newValue); #else #error No TRI_CompareAndSwapIntegerInt32 implementation defined #endif @@ -584,7 +584,7 @@ bool TRI_CompareIntegerInt32 (volatile int32_t* theValue, int32_t oldValue) { #if __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 return OSAtomicCompareAndSwap32Barrier(oldValue, oldValue, theValue); #elif (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 - return __sync_val_compare_and_swap(theValue, oldValue, oldValue); + return __sync_bool_compare_and_swap(theValue, oldValue, oldValue); #else #error No TRI_CompareAndSwapIntegerInt32 implementation defined #endif @@ -594,7 +594,7 @@ bool TRI_CompareAndSwapIntegerUInt32 (volatile uint32_t* theValue, uint32_t oldV #if __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 return OSAtomicCompareAndSwap32Barrier((int32_t)(oldValue), (int32_t)(newValue), (volatile int32_t*)(theValue)); #elif (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 - return __sync_val_compare_and_swap(theValue, oldValue, newValue); + return __sync_bool_compare_and_swap(theValue, oldValue, newValue); #else #error No TRI_CompareAndSwapIntegerUInt32 implementation defined #endif @@ -604,7 +604,7 @@ bool TRI_CompareIntegerUInt32 (volatile uint32_t* theValue, uint32_t oldValue) { #if __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 return OSAtomicCompareAndSwap32Barrier((int32_t)(oldValue), (int32_t)(oldValue), (volatile int32_t*)(theValue)); #elif (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 - return __sync_val_compare_and_swap(theValue, oldValue, oldValue); + return __sync_bool_compare_and_swap(theValue, oldValue, oldValue); #else #error No TRI_CompareAndSwapIntegerUInt32 implementation defined #endif @@ -618,7 +618,7 @@ bool TRI_CompareAndSwapIntegerInt64 (volatile int64_t* theValue, int64_t oldValu #if __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 return OSAtomicCompareAndSwap64Barrier(oldValue, newValue, theValue); #elif (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 - return __sync_val_compare_and_swap(theValue, oldValue, newValue); + return __sync_bool_compare_and_swap(theValue, oldValue, newValue); #else #error No TRI_CompareAndSwapIntegerInt64 implementation defined #endif @@ -628,7 +628,7 @@ bool TRI_CompareIntegerInt64 (volatile int64_t* theValue, int64_t oldValue) { #if __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 return OSAtomicCompareAndSwap64Barrier(oldValue, oldValue, theValue); #elif (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 - return __sync_val_compare_and_swap(theValue, oldValue, oldValue); + return __sync_bool_compare_and_swap(theValue, oldValue, oldValue); #else #error No TRI_CompareAndSwapIntegerInt64 implementation defined #endif @@ -638,7 +638,7 @@ bool TRI_CompareAndSwapIntegerUInt64 (volatile uint64_t* theValue, uint64_t oldV #if __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 return OSAtomicCompareAndSwap64Barrier((int64_t)(oldValue), (int64_t)(newValue), (volatile int64_t*)(theValue)); #elif (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 - return __sync_val_compare_and_swap(theValue, oldValue, newValue); + return __sync_bool_compare_and_swap(theValue, oldValue, newValue); #else #error No TRI_CompareAndSwapIntegerUInt64 implementation defined #endif @@ -648,7 +648,7 @@ bool TRI_CompareIntegerUInt64 (volatile uint64_t* theValue, uint64_t oldValue) { #if __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 return OSAtomicCompareAndSwap64Barrier((int64_t)(oldValue), (int64_t)(oldValue), (volatile int64_t*)(theValue)); #elif (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 - return __sync_val_compare_and_swap(theValue, oldValue, oldValue); + return __sync_bool_compare_and_swap(theValue, oldValue, oldValue); #else #error No TRI_CompareAndSwapIntegerUInt64 implementation defined #endif @@ -662,7 +662,7 @@ bool TRI_CompareAndSwapPointer(void* volatile* theValue, void* oldValue, void* n #if __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 return OSAtomicCompareAndSwapPtrBarrier(oldValue, newValue, theValue); #elif (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 - return __sync_val_compare_and_swap(theValue, oldValue, newValue); + return __sync_bool_compare_and_swap(theValue, oldValue, newValue); #else #error No TRI_CompareAndSwapPointer implementation defined #endif @@ -672,7 +672,7 @@ bool TRI_ComparePointer(void* volatile* theValue, void* oldValue) { #if __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 return OSAtomicCompareAndSwapPtrBarrier(oldValue, oldValue, theValue); #elif (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 - return __sync_val_compare_and_swap(theValue, oldValue, oldValue); + return __sync_bool_compare_and_swap(theValue, oldValue, oldValue); #else #error No TRI_CompareAndSwapPointer implementation defined #endif diff --git a/lib/BasicsC/voc-errors.c b/lib/BasicsC/voc-errors.c index 2f4bf0f5da..0e3cc28d6b 100644 --- a/lib/BasicsC/voc-errors.c +++ b/lib/BasicsC/voc-errors.c @@ -163,6 +163,8 @@ void TRI_InitialiseErrorMessages (void) { REG_ERROR(WARNING_ARANGO_INDEX_SKIPLIST_INSERT_CAS_FAILURE, "skiplist index insertion warning - CAS failure while attempting to insert document"); REG_ERROR(WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_MISSING, "skiplist index remove failure - item missing in index"); REG_ERROR(WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_CAS_FAILURE, "skiplist index remove warning - CAS failure while attempting to remove document"); + REG_ERROR(WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_POST_INSERTED, "skiplist index remove failure - item inserted post this transaction in the index"); + REG_ERROR(WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_PRIOR_REMOVED, "skiplist index remove failure - item removed prior this transaction in the index"); REG_ERROR(WARNING_ARANGO_INDEX_BITARRAY_DOCUMENT_ATTRIBUTE_MISSING, "bitarray index insertion warning - attribute missing in document"); REG_ERROR(WARNING_ARANGO_INDEX_BITARRAY_UPDATE_ATTRIBUTE_MISSING, "bitarray index update warning - attribute missing in revised document"); REG_ERROR(WARNING_ARANGO_INDEX_BITARRAY_REMOVE_ITEM_MISSING, "bitarray index remove failure - item missing in index"); @@ -173,6 +175,7 @@ void TRI_InitialiseErrorMessages (void) { REG_ERROR(RESULT_ELEMENT_EXISTS, "element not inserted into structure, because it already exists"); REG_ERROR(RESULT_KEY_NOT_FOUND, "key not found in structure"); REG_ERROR(RESULT_ELEMENT_NOT_FOUND, "element not found in structure"); + REG_ERROR(WARNING_ARANGO_INDEX_GARBAGE_COLLECTOR_SHUTDOWN, "the index garbage collector has shutdown and no further entries can be processed"); } //////////////////////////////////////////////////////////////////////////////// diff --git a/lib/BasicsC/voc-errors.h b/lib/BasicsC/voc-errors.h index 6e71418dff..41c0e9dcc2 100644 --- a/lib/BasicsC/voc-errors.h +++ b/lib/BasicsC/voc-errors.h @@ -371,6 +371,14 @@ extern "C" { /// - 3313: @LIT{skiplist index remove warning - CAS failure while attempting to remove document} /// Will be raised when an attempt to remove a document into a skiplist index /// fails due to repeated CAS failures/clashes. +/// - 3315: @LIT{skiplist index remove failure - item inserted post this transaction in the index} +/// Will be raised when an attempt to remove a document from a skiplist index +/// fails due to the fact that the document to be removed was inserted in a +/// transaction post this removal transaction. +/// - 3317: @LIT{skiplist index remove failure - item removed prior this transaction in the index} +/// Will be raised when an attempt to remove a document from a skiplist index +/// fails due to the fact that the document to be removed was removed in a +/// transaction prior this removal transaction. /// - 3400: @LIT{bitarray index insertion warning - attribute missing in document} /// Will be raised when an attempt to insert a document into a bitarray index /// is caused by in the document not having one or more attributes which are @@ -401,6 +409,10 @@ extern "C" { /// Will be returned if the key was not found in the structure. /// - 10003: @LIT{element not found in structure} /// Will be returned if the element was not found in the structure. +/// - 11000: @LIT{the index garbage collector has shutdown and no further entries can be processed} +/// Will be raised when an attempt to add an item to the index garbage +/// collector fails due to the fact that the state of the collector is in +/// shutdown mode. //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// @@ -1989,6 +2001,32 @@ void TRI_InitialiseErrorMessages (void); #define TRI_WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_CAS_FAILURE (3313) +//////////////////////////////////////////////////////////////////////////////// +/// @brief 3315: WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_POST_INSERTED +/// +/// skiplist index remove failure - item inserted post this transaction in the +/// index +/// +/// Will be raised when an attempt to remove a document from a skiplist index +/// fails due to the fact that the document to be removed was inserted in a +/// transaction post this removal transaction. +//////////////////////////////////////////////////////////////////////////////// + +#define TRI_WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_POST_INSERTED (3315) + +//////////////////////////////////////////////////////////////////////////////// +/// @brief 3317: WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_PRIOR_REMOVED +/// +/// skiplist index remove failure - item removed prior this transaction in the +/// index +/// +/// Will be raised when an attempt to remove a document from a skiplist index +/// fails due to the fact that the document to be removed was removed in a +/// transaction prior this removal transaction. +//////////////////////////////////////////////////////////////////////////////// + +#define TRI_WARNING_ARANGO_INDEX_SKIPLIST_REMOVE_ITEM_PRIOR_REMOVED (3317) + //////////////////////////////////////////////////////////////////////////////// /// @brief 3400: WARNING_ARANGO_INDEX_BITARRAY_DOCUMENT_ATTRIBUTE_MISSING /// @@ -2102,6 +2140,19 @@ void TRI_InitialiseErrorMessages (void); #define TRI_RESULT_ELEMENT_NOT_FOUND (10003) +//////////////////////////////////////////////////////////////////////////////// +/// @brief 11000: WARNING_ARANGO_INDEX_GARBAGE_COLLECTOR_SHUTDOWN +/// +/// the index garbage collector has shutdown and no further entries can be +/// processed +/// +/// Will be raised when an attempt to add an item to the index garbage +/// collector fails due to the fact that the state of the collector is in +/// shutdown mode. +//////////////////////////////////////////////////////////////////////////////// + +#define TRI_WARNING_ARANGO_INDEX_GARBAGE_COLLECTOR_SHUTDOWN (11000) + //////////////////////////////////////////////////////////////////////////////// /// @}