From 7f7bdddd2cb165278e7b4e440989d1522929c423 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Sat, 4 Dec 2021 17:45:58 +0100 Subject: [PATCH] DFAContentModel::buildDFA(): fix memory leaks when OutOfMemoryException occurs Fixes GDAL's https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=41335 --- .../validators/common/DFAContentModel.cpp | 572 ++++++++++-------- 1 file changed, 304 insertions(+), 268 deletions(-) diff --git a/src/xercesc/validators/common/DFAContentModel.cpp b/src/xercesc/validators/common/DFAContentModel.cpp index 6d6b124c1..8bd854fda 100644 --- a/src/xercesc/validators/common/DFAContentModel.cpp +++ b/src/xercesc/validators/common/DFAContentModel.cpp @@ -201,6 +201,8 @@ void DFAContentModel::cleanup() delete fLeafList[index]; fMemoryManager->deallocate(fLeafList); //delete [] fLeafList; } + + delete fHeadNode; } @@ -674,6 +676,7 @@ void DFAContentModel::buildDFA(ContentSpecNode* const curNode) fLeafCount * sizeof(ContentSpecNode::NodeTypes) ); //new ContentSpecNode::NodeTypes[fLeafCount]; // + memset(fLeafListType, 0, fLeafCount*sizeof(ContentSpecNode::NodeTypes)); // And, moving onward... We now need to build the follow position sets // for all the nodes. So we allocate an array of pointers to state sets, // one for each leaf node (i.e. each significant DFA position.) @@ -780,10 +783,12 @@ void DFAContentModel::buildDFA(ContentSpecNode* const curNode) ( fLeafCount * sizeof(QName*) ); //new QName*[fLeafCount]; + memset(fElemMap, 0, fLeafCount * sizeof(QName*)); fElemMapType = (ContentSpecNode::NodeTypes*) fMemoryManager->allocate ( fLeafCount * sizeof(ContentSpecNode::NodeTypes) ); //new ContentSpecNode::NodeTypes[fLeafCount]; + memset(fElemMapType, 0, fLeafCount * sizeof(ContentSpecNode::NodeTypes)); fElemMapSize = 0; Occurence** elemOccurenceMap=0; @@ -962,6 +967,7 @@ void DFAContentModel::buildDFA(ContentSpecNode* const curNode) // delete fHeadNode; + fHeadNode = nullptr; // // Init our two state flags. Basically the unmarked state counter is @@ -1003,330 +1009,360 @@ void DFAContentModel::buildDFA(ContentSpecNode* const curNode) // the states to do counter. // CMStateSet* newSet = 0; - while (unmarkedState < curState) + + // Lamba that is called after the while (unmarkedState < curState) loop + // in the normal and exception cases. + const auto finalizeProcessingAndCleanup = [&]() { + // Store the current state count in the trans table size + fTransTableSize = curState; + + // + // Fill in the occurence information for each looping state + // if we're using counters. + // + if (elemOccurenceMap != 0) { + fCountingStates = (Occurence**)fMemoryManager->allocate(fTransTableSize*sizeof(Occurence*)); + memset(fCountingStates, 0, fTransTableSize*sizeof(Occurence*)); + for (unsigned int i = 0; i < fTransTableSize; ++i) { + unsigned int * transitions = fTransTable[i]; + for (unsigned int j = 0; j < fElemMapSize; ++j) { + if (i == transitions[j]) { + Occurence* old=elemOccurenceMap[j]; + if(old!=0) + fCountingStates[i] = new (fMemoryManager) Occurence(old->minOccurs, old->maxOccurs, old->elemIndex); + break; + } + } + } + for (unsigned int j = 0; j < fLeafCount; ++j) { + if(elemOccurenceMap[j]!=0) + delete elemOccurenceMap[j]; + } + fMemoryManager->deallocate(elemOccurenceMap); + } + + // If the last temp set was not stored, then clean it up + if (newSet) + delete newSet; + + // + // Now we can clean up all of the temporary data that was needed during + // DFA build. + // + + for (index = 0; index < fLeafCount; index++) + delete fFollowList[index]; + fMemoryManager->deallocate(fFollowList); //delete [] fFollowList; + fFollowList = NULL; + // - // Get the next unmarked state out of the list of states to do. - // And get the associated transition table entry. + // removeAll() will delete all data, XMLInteger, + // while the keys are to be deleted by the + // deletion of statesToDo. // - setT = statesToDo[unmarkedState]; - unsigned int* transEntry = fTransTable[unmarkedState]; + delete stateTable; - // Mark this one final if it contains the EOC state - fFinalStateFlags[unmarkedState] = setT->getBit(fEOCPos); + for (index = 0; index < curState; index++) + delete statesToDo[index]; + fMemoryManager->deallocate(statesToDo); //delete [] statesToDo; - // Bump up the unmarked state count, marking this state done - unmarkedState++; + for (index = 0; index < fLeafCount; index++) + delete fLeafList[index]; + fMemoryManager->deallocate(fLeafList); //delete [] fLeafList; + fLeafList = NULL; #ifdef OPTIMIZED_BUT_STILL_LINEAR_SEARCH - // Optimization(Jan, 2001) - unsigned int sorterIndex = 0; - // Optimization(Jan, 2001) + fMemoryManager->deallocate(leafSorter); //delete [] leafSorter; #endif + for (index=0; index < fElemMapSize; index++) + fMemoryManager->deallocate(leafSorter[index]); + fMemoryManager->deallocate(leafSorter); + }; - // Loop through each possible input symbol in the element map - for (unsigned int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) + try + { + while (unmarkedState < curState) { // - // Build up a set of states which is the union of all of the - // follow sets of DFA positions that are in the current state. If - // we gave away the new set last time through then create a new - // one. Otherwise, zero out the existing one. + // Get the next unmarked state out of the list of states to do. + // And get the associated transition table entry. // - if (!newSet) - newSet = new (fMemoryManager) CMStateSet - ( - fLeafCount - , fMemoryManager - ); - else - newSet->zeroBits(); + setT = statesToDo[unmarkedState]; + unsigned int* transEntry = fTransTable[unmarkedState]; + + // Mark this one final if it contains the EOC state + fFinalStateFlags[unmarkedState] = setT->getBit(fEOCPos); + + // Bump up the unmarked state count, marking this state done + unmarkedState++; + +#ifdef OPTIMIZED_BUT_STILL_LINEAR_SEARCH + // Optimization(Jan, 2001) + unsigned int sorterIndex = 0; + // Optimization(Jan, 2001) +#endif + + // Loop through each possible input symbol in the element map + for (unsigned int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) + { + // + // Build up a set of states which is the union of all of the + // follow sets of DFA positions that are in the current state. If + // we gave away the new set last time through then create a new + // one. Otherwise, zero out the existing one. + // + if (!newSet) + newSet = new (fMemoryManager) CMStateSet + ( + fLeafCount + , fMemoryManager + ); + else + newSet->zeroBits(); #ifdef OBSOLETED // unoptimized code - for (unsigned int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) - { - // If this leaf index (DFA position) is in the current set... - if (setT->getBit(leafIndex)) + for (unsigned int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) { - // - // If this leaf is the current input symbol, then we want - // to add its follow list to the set of states to transition - // to from the current state. - // - const QName* leaf = fLeafList[leafIndex]->getElement(); - const QName* element = fElemMap[elemIndex]; - if (fDTD) { - if (XMLString::equals(leaf->getRawName(), element->getRawName())) { - *newSet |= *fFollowList[leafIndex]; + // If this leaf index (DFA position) is in the current set... + if (setT->getBit(leafIndex)) + { + // + // If this leaf is the current input symbol, then we want + // to add its follow list to the set of states to transition + // to from the current state. + // + const QName* leaf = fLeafList[leafIndex]->getElement(); + const QName* element = fElemMap[elemIndex]; + if (fDTD) { + if (XMLString::equals(leaf->getRawName(), element->getRawName())) { + *newSet |= *fFollowList[leafIndex]; + } } - } - else { - if ((leaf->getURI() == element->getURI()) && - (XMLString::equals(leaf->getLocalPart(), element->getLocalPart()))) { - *newSet |= *fFollowList[leafIndex]; + else { + if ((leaf->getURI() == element->getURI()) && + (XMLString::equals(leaf->getLocalPart(), element->getLocalPart()))) { + *newSet |= *fFollowList[leafIndex]; + } } } - } - } // for leafIndex + } // for leafIndex #endif #ifdef OPTIMIZED_BUT_STILL_LINEAR_SEARCH - // Optimization(Jan, 2001) - int leafIndex = leafSorter[sorterIndex++]; + // Optimization(Jan, 2001) + int leafIndex = leafSorter[sorterIndex++]; - while (leafIndex != -1) - { - // If this leaf index (DFA position) is in the current set... - if (setT->getBit(leafIndex)) + while (leafIndex != -1) { - // - // If this leaf is the current input symbol, then we - // want to add its follow list to the set of states to - // transition to from the current state. - // - *newSet |= *fFollowList[leafIndex]; - } - leafIndex = leafSorter[sorterIndex++]; - } // while (leafIndex != -1) + // If this leaf index (DFA position) is in the current set... + if (setT->getBit(leafIndex)) + { + // + // If this leaf is the current input symbol, then we + // want to add its follow list to the set of states to + // transition to from the current state. + // + *newSet |= *fFollowList[leafIndex]; + } + leafIndex = leafSorter[sorterIndex++]; + } // while (leafIndex != -1) #endif - unsigned int* fLeafIndexes=leafSorter[elemIndex]; - unsigned int fNumItems=fLeafIndexes[0]; - if(fNumItems!=0) - { - // The algorithm requires finding the leaf that is present both in the bitfield of the current state, and in the - // list of places where the currently tested item can appear. When this occurs, the follow list of this parent item - // is added to the bitfield representing the next state. - // Both the bitfield and the list of places are sorted, so we can analyze them in two ways; either iterating over the - // parent items, testing the bitfield for the existence of the parent (N times a constant Tb), or by iterating over the - // bitfield (restricted to the range of the sorted list of places), using a binary search to locate the leaf in the - // sorted list of places (M times log(N) testing operations Ts) - // Assuming that the time to test a bit is roughly the same of the time needed to compute the average of two integers, - // plus a couple of comparisons and additions, we compare N agains M*log(N) to decide which algorithm should be faster given - // the two sets - if(fNumItems <= setT->getBitCountInRange(fLeafIndexes[1], fLeafIndexes[fNumItems])*log((float)fNumItems)) + unsigned int* fLeafIndexes=leafSorter[elemIndex]; + unsigned int fNumItems=fLeafIndexes[0]; + if(fNumItems!=0) { - for(unsigned int i=1; i<=fNumItems; ++i) - if(setT->getBit(fLeafIndexes[i])) - { - // - // If this leaf is the current input symbol, then we - // want to add its follow list to the set of states to - // transition to from the current state. - // - *newSet |= *fFollowList[ fLeafIndexes[i] ]; - } - } - else - { - // Further optimization: given that the bitfield enumerator returns the numbers in order, - // every time we raise the lower marker we know it will true also for the next bits, so - // the next binary search will not start from 1 but from this index - unsigned int lowIndex = 1; - // Start the enumerator from the first index in the sorted list of places, - // as nothing before that point will match - CMStateSetEnumerator enumBits(setT, fLeafIndexes[1]); - while(enumBits.hasMoreElements()) + // The algorithm requires finding the leaf that is present both in the bitfield of the current state, and in the + // list of places where the currently tested item can appear. When this occurs, the follow list of this parent item + // is added to the bitfield representing the next state. + // Both the bitfield and the list of places are sorted, so we can analyze them in two ways; either iterating over the + // parent items, testing the bitfield for the existence of the parent (N times a constant Tb), or by iterating over the + // bitfield (restricted to the range of the sorted list of places), using a binary search to locate the leaf in the + // sorted list of places (M times log(N) testing operations Ts) + // Assuming that the time to test a bit is roughly the same of the time needed to compute the average of two integers, + // plus a couple of comparisons and additions, we compare N agains M*log(N) to decide which algorithm should be faster given + // the two sets + if(fNumItems <= setT->getBitCountInRange(fLeafIndexes[1], fLeafIndexes[fNumItems])*log((float)fNumItems)) { - unsigned int bitIndex=enumBits.nextElement(); - // if this leaf is greater than the last index in the sorted list of places, - // nothing can be found from now on, so get out of here - if(bitIndex > fLeafIndexes[fNumItems]) - break; - - // Check if this leaf index (DFA position) is in the current set - // (using binary search: the indexes are sorted) - unsigned int first=lowIndex,last=fNumItems,i; - while(first<=last) - { - i=(first+last)/2; - if(fLeafIndexes[i]>bitIndex) - last=i-1; - else if(fLeafIndexes[i]getBit(fLeafIndexes[i])) { // // If this leaf is the current input symbol, then we // want to add its follow list to the set of states to // transition to from the current state. // - *newSet |= *fFollowList[bitIndex]; + *newSet |= *fFollowList[ fLeafIndexes[i] ]; + } + } + else + { + // Further optimization: given that the bitfield enumerator returns the numbers in order, + // every time we raise the lower marker we know it will true also for the next bits, so + // the next binary search will not start from 1 but from this index + unsigned int lowIndex = 1; + // Start the enumerator from the first index in the sorted list of places, + // as nothing before that point will match + CMStateSetEnumerator enumBits(setT, fLeafIndexes[1]); + while(enumBits.hasMoreElements()) + { + unsigned int bitIndex=enumBits.nextElement(); + // if this leaf is greater than the last index in the sorted list of places, + // nothing can be found from now on, so get out of here + if(bitIndex > fLeafIndexes[fNumItems]) break; + + // Check if this leaf index (DFA position) is in the current set + // (using binary search: the indexes are sorted) + unsigned int first=lowIndex,last=fNumItems,i; + while(first<=last) + { + i=(first+last)/2; + if(fLeafIndexes[i]>bitIndex) + last=i-1; + else if(fLeafIndexes[i]isEmpty()) - { // - // Search the 'states to do' list to see if this new - // state set is already in there. + // If this new set is not empty, then see if its in the list + // of states to do. If not, then add it. // - /*** - unsigned int stateIndex = 0; - for (; stateIndex < curState; stateIndex++) - { - if (*statesToDo[stateIndex] == *newSet) - break; - } - ***/ - - XMLInteger *stateObj = stateTable->get(newSet); - unsigned int stateIndex = (stateObj == 0 ? curState : stateObj->intValue()); - - // If we did not find it, then add it - if (stateIndex == curState) + if (!newSet->isEmpty()) { // - // Put this new state into the states to do and init - // a new entry at the same index in the transition - // table. + // Search the 'states to do' list to see if this new + // state set is already in there. // - statesToDo[curState] = newSet; - fTransTable[curState] = makeDefStateList(); - stateTable->put - ( - newSet - , new (fMemoryManager) XMLInteger(curState) - ); + /*** + unsigned int stateIndex = 0; + for (; stateIndex < curState; stateIndex++) + { + if (*statesToDo[stateIndex] == *newSet) + break; + } + ***/ - // We now have a new state to do so bump the count - curState++; + XMLInteger *stateObj = stateTable->get(newSet); + unsigned int stateIndex = (stateObj == 0 ? curState : stateObj->intValue()); - // - // Null out the new set to indicate we adopted it. This - // will cause the creation of a new set on the next time - // around the loop. - // - newSet = 0; - } - - // - // Now set this state in the transition table's entry for this - // element (using its index), with the DFA state we will move - // to from the current state when we see this input element. - // - transEntry[elemIndex] = stateIndex; + // If we did not find it, then add it + if (stateIndex == curState) + { + // + // Put this new state into the states to do and init + // a new entry at the same index in the transition + // table. + // + statesToDo[curState] = newSet; + fTransTable[curState] = makeDefStateList(); + stateTable->put + ( + newSet + , new (fMemoryManager) XMLInteger(curState) + ); + + // We now have a new state to do so bump the count + curState++; + + // + // Null out the new set to indicate we adopted it. This + // will cause the creation of a new set on the next time + // around the loop. + // + newSet = 0; + } - // Expand the arrays if we're full - if (curState == curArraySize) - { // - // Yikes, we overflowed the initial array size, so we've - // got to expand all of these arrays. So adjust up the - // size by 50% and allocate new arrays. + // Now set this state in the transition table's entry for this + // element (using its index), with the DFA state we will move + // to from the current state when we see this input element. // - const unsigned int newSize = (unsigned int)(curArraySize * 1.5); - CMStateSet** newToDo = (CMStateSet**) - fMemoryManager->allocate - ( - newSize * sizeof(CMStateSet*) - ); //new const CMStateSet*[newSize]; - bool* newFinalFlags = (bool*) fMemoryManager->allocate - ( - newSize * sizeof(bool) - ); //new bool[newSize]; - unsigned int** newTransTable = (unsigned int**) - fMemoryManager->allocate - ( - newSize * sizeof(unsigned int*) - ); //new unsigned int*[newSize]; + transEntry[elemIndex] = stateIndex; - // Copy over all of the existing content - for (unsigned int expIndex = 0; expIndex < curArraySize; expIndex++) + // Expand the arrays if we're full + if (curState == curArraySize) { - newToDo[expIndex] = statesToDo[expIndex]; - newFinalFlags[expIndex] = fFinalStateFlags[expIndex]; - newTransTable[expIndex] = fTransTable[expIndex]; - } - - // Clean up the old stuff - fMemoryManager->deallocate(statesToDo); //delete [] statesToDo; - fMemoryManager->deallocate(fFinalStateFlags); //delete [] fFinalStateFlags; - fMemoryManager->deallocate(fTransTable); //delete [] fTransTable; - - // Store the new array size and pointers - curArraySize = newSize; - statesToDo = newToDo; - fFinalStateFlags = newFinalFlags; - fTransTable = newTransTable; - } //if (curState == curArraySize) - } //if (!newSet->isEmpty()) - } // for elemIndex - } //while + // + // Yikes, we overflowed the initial array size, so we've + // got to expand all of these arrays. So adjust up the + // size by 50% and allocate new arrays. + // + const unsigned int newSize = (unsigned int)(curArraySize * 1.5); + CMStateSet** newToDo = nullptr; + bool* newFinalFlags = nullptr; + unsigned int** newTransTable = nullptr; + try + { + newToDo = (CMStateSet**) + fMemoryManager->allocate + ( + newSize * sizeof(CMStateSet*) + ); //new const CMStateSet*[newSize]; + newFinalFlags = (bool*) fMemoryManager->allocate + ( + newSize * sizeof(bool) + ); //new bool[newSize]; + newTransTable = (unsigned int**) + fMemoryManager->allocate + ( + newSize * sizeof(unsigned int*) + ); //new unsigned int*[newSize]; + + // Copy over all of the existing content + for (unsigned int expIndex = 0; expIndex < curArraySize; expIndex++) + { + newToDo[expIndex] = statesToDo[expIndex]; + newFinalFlags[expIndex] = fFinalStateFlags[expIndex]; + newTransTable[expIndex] = fTransTable[expIndex]; + } + } + catch( const OutOfMemoryException& e ) + { + fMemoryManager->deallocate(newToDo); + fMemoryManager->deallocate(newFinalFlags); + fMemoryManager->deallocate(newTransTable); + throw; - // Store the current state count in the trans table size - fTransTableSize = curState; + } - // - // Fill in the occurence information for each looping state - // if we're using counters. - // - if (elemOccurenceMap != 0) { - fCountingStates = (Occurence**)fMemoryManager->allocate(fTransTableSize*sizeof(Occurence*)); - memset(fCountingStates, 0, fTransTableSize*sizeof(Occurence*)); - for (unsigned int i = 0; i < fTransTableSize; ++i) { - unsigned int * transitions = fTransTable[i]; - for (unsigned int j = 0; j < fElemMapSize; ++j) { - if (i == transitions[j]) { - Occurence* old=elemOccurenceMap[j]; - if(old!=0) - fCountingStates[i] = new (fMemoryManager) Occurence(old->minOccurs, old->maxOccurs, old->elemIndex); - break; - } - } - } - for (unsigned int j = 0; j < fLeafCount; ++j) { - if(elemOccurenceMap[j]!=0) - delete elemOccurenceMap[j]; - } - fMemoryManager->deallocate(elemOccurenceMap); + // Clean up the old stuff + fMemoryManager->deallocate(statesToDo); //delete [] statesToDo; + fMemoryManager->deallocate(fFinalStateFlags); //delete [] fFinalStateFlags; + fMemoryManager->deallocate(fTransTable); //delete [] fTransTable; + + // Store the new array size and pointers + curArraySize = newSize; + statesToDo = newToDo; + fFinalStateFlags = newFinalFlags; + fTransTable = newTransTable; + } //if (curState == curArraySize) + } //if (!newSet->isEmpty()) + } // for elemIndex + } //while + } + catch( const OutOfMemoryException& e ) + { + finalizeProcessingAndCleanup(); + throw; } - // If the last temp set was not stored, then clean it up - if (newSet) - delete newSet; - - // - // Now we can clean up all of the temporary data that was needed during - // DFA build. - // - - for (index = 0; index < fLeafCount; index++) - delete fFollowList[index]; - fMemoryManager->deallocate(fFollowList); //delete [] fFollowList; - fFollowList = NULL; - - // - // removeAll() will delete all data, XMLInteger, - // while the keys are to be deleted by the - // deletion of statesToDo. - // - delete stateTable; - - for (index = 0; index < curState; index++) - delete statesToDo[index]; - fMemoryManager->deallocate(statesToDo); //delete [] statesToDo; - - for (index = 0; index < fLeafCount; index++) - delete fLeafList[index]; - fMemoryManager->deallocate(fLeafList); //delete [] fLeafList; - fLeafList = NULL; - -#ifdef OPTIMIZED_BUT_STILL_LINEAR_SEARCH - fMemoryManager->deallocate(leafSorter); //delete [] leafSorter; -#endif - for (index=0; index < fElemMapSize; index++) - fMemoryManager->deallocate(leafSorter[index]); - fMemoryManager->deallocate(leafSorter); + finalizeProcessingAndCleanup(); } unsigned int DFAContentModel::countLeafNodes(ContentSpecNode* const curNode)