From 8d103c15637d74a63a186fd9fd6ed6747ce4494f Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Tue, 3 Dec 2024 08:26:30 -0800 Subject: [PATCH 1/7] minor fix and reordering --- ...ication_predict_dense_default_batch_impl.i | 23 +- ..._classification_train_dense_default_impl.i | 206 +++++++++++------- 2 files changed, 144 insertions(+), 85 deletions(-) mode change 100755 => 100644 cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i old mode 100755 new mode 100644 index 82cb20faaed..b6d987c2cd3 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -39,6 +39,7 @@ #include "src/algorithms/k_nearest_neighbors/kdtree_knn_classification_model_impl.h" #include "src/algorithms/k_nearest_neighbors/kdtree_knn_impl.i" #include "src/algorithms/k_nearest_neighbors/knn_heap.h" +#include namespace daal { @@ -145,11 +146,27 @@ Status KNNClassificationPredictKernel::compu if (par3 == NULL) return Status(ErrorNullParameterNotSupported); - const Model * const model = static_cast(m); - const auto & kdTreeTable = *(model->impl()->getKDTreeTable()); + const Model * const model = static_cast(m); + const KDTreeTable & kdTreeTable = *(model->impl()->getKDTreeTable()); + const KDTreeNode * const nodes = static_cast(kdTreeTable.getArray()); + const size_t xRowCount = x->getNumberOfRows(); + + const algorithmFpType base = 2.0; + const algorithmFpType baseInPower = Math::sPowx(base, Math::sCeil(Math::sLog(base * xRowCount - 1) / Math::sLog(base))); + DAAL_ASSERT(baseInPower > 0) + const size_t maxKDTreeNodeCount = ((size_t)baseInPower * __KDTREE_MAX_NODE_COUNT_MULTIPLICATION_FACTOR) / __KDTREE_LEAF_BUCKET_SIZE + 1; + for (int index = 0; index < maxKDTreeNodeCount; index++) + { + const KDTreeNode & node = nodes[index]; + + std::cout << "Node Index: " << index << ", Dimension: " << node.dimension << ", Cut Point: " << node.cutPoint + << ", Left Index: " << node.leftIndex << ", Right Index: " << node.rightIndex << std::endl; + } + const auto rootTreeNodeIndex = model->impl()->getRootNodeIndex(); const NumericTable & data = *(model->impl()->getData()); const NumericTable * labels = nullptr; + if (resultsToEvaluate != 0) { labels = model->impl()->getLabels().get(); @@ -164,8 +181,6 @@ Status KNNClassificationPredictKernel::compu } const size_t heapSize = (iSize / 16 + 1) * 16; - const size_t xRowCount = x->getNumberOfRows(); - const algorithmFpType base = 2.0; const size_t expectedMaxDepth = (Math::sLog(xRowCount) / Math::sLog(base) + 1) * __KDTREE_DEPTH_MULTIPLICATION_FACTOR; const size_t stackSize = Math::sPowx(base, Math::sCeil(Math::sLog(expectedMaxDepth) / Math::sLog(base))); struct Local diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i index da678fac69e..1a2a0f5359a 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i @@ -61,39 +61,70 @@ using namespace kdtree_knn_classification::internal; template class Queue { + static const size_t defaultSize = 4; + public: - Queue() : _data(nullptr) {} + Queue() : _data(nullptr), _first(0), _last(0), _count(0), _size(0), _capacity(0) {} - ~Queue() - { - services::daal_free(_data); - _data = nullptr; - } + ~Queue() { clear(); } + + Queue(const Queue &) = delete; + Queue & operator=(const Queue &) = delete; bool init(size_t size) { clear(); + if (size == 0) // Check for valid size + { + return false; + } + _first = _count = 0; _last = _sizeMinus1 = (_size = size) - 1; - return ((_data = static_cast(service_malloc(size * sizeof(T)))) != nullptr); + _data = static_cast(service_malloc(size)); + + if (!_data) // Check if memory allocation was successful + { + return false; + } + + _capacity = _size; // Initialize capacity + return true; } void clear() { - daal_free(_data); - _data = nullptr; + if (_data) + { + daal::services::internal::service_free(_data); // Free allocated memory if it exists + _data = nullptr; + } + _first = _last = _count = _size = _sizeMinus1 = _capacity = 0; // Reset state } + void reset() { _first = _last = _count = 0; } + DAAL_FORCEINLINE void push(const T & value) { - _data[_last = (_last + 1) & _sizeMinus1] = value; + if (_count >= _capacity) // Check if capacity is exceeded + { + services::Status status = grow(); // Grow if necessary + //DAAL_CHECK_STATUS_VAR(status); + } + + _data[_last = (_last + 1) & _sizeMinus1] = value; // Add element to queue ++_count; } DAAL_FORCEINLINE T pop() { - const T value = _data[_first++]; - _first *= (_first != _size); + // if (empty()) // Check if queue is empty + // { + // throw std::underflow_error("Queue underflow: no elements to pop."); + // } + + const T value = _data[_first++]; // Retrieve element + _first *= (_first != _size); // Reset first index if it reaches the end --_count; return value; } @@ -103,12 +134,34 @@ public: size_t size() const { return _count; } private: + services::Status grow() + { + int result = 0; + _capacity = (_capacity == 0 ? defaultSize : _capacity * 2); // Double capacity or set to default + + T * const newData = daal::services::internal::service_malloc(_capacity); + DAAL_CHECK_MALLOC(newData); + + if (_data != nullptr) + { + result = services::internal::daal_memcpy_s(newData, _last * sizeof(T), _data, _last * sizeof(T)); + daal::services::internal::service_free(_data); // Free old data + _data = nullptr; + } + + _data = newData; // Assign new expanded memory + _size = _capacity; // Adjust size to new capacity + _sizeMinus1 = _capacity - 1; // Update size minus 1 for wrapping + return (!result) ? services::Status() : services::Status(services::ErrorMemoryCopyFailedInternal); + } + T * _data; - size_t _first; - size_t _last; - size_t _count; - size_t _size; - size_t _sizeMinus1; + size_t _first; // Index of the first element + size_t _last; // Index of the last element + size_t _count; // Current number of elements + size_t _size; // Current size of the queue + size_t _sizeMinus1; // Helper for wrap-around logic + size_t _capacity; // Maximum capacity of the queue }; struct BuildNode @@ -159,20 +212,16 @@ Status KNNClassificationTrainBatchKernel *>(r->impl()->getIndices().get())->getArray(); Queue q; - BBox * bboxQ = nullptr; - auto oldThreads = services::Environment::getInstance()->getNumberOfThreads(); + BBox * bboxQ = nullptr; DAAL_CHECK_STATUS(status, buildFirstPartOfKDTree(q, bboxQ, *x, *r, indexes, engine)); - // Temporary workaround for threading issues in `buildSecondPartOfKDTree()` - // Fix to be provided in https://github.com/oneapi-src/oneDAL/pull/2925 - services::Environment::getInstance()->setNumberOfThreads(1); + DAAL_CHECK_STATUS(status, buildSecondPartOfKDTree(q, bboxQ, *x, *r, indexes, engine)); - services::Environment::getInstance()->setNumberOfThreads(oldThreads); + DAAL_CHECK_STATUS(status, rearrangePoints(*x, indexes)); if (y) { DAAL_CHECK_STATUS(status, rearrangePoints(*y, indexes)); } - daal_free(bboxQ); bboxQ = nullptr; return status; @@ -189,9 +238,7 @@ Status KNNClassificationTrainBatchKernel BBox; const algorithmFpType base = 2.0; - // The queue size is not impacted by number of threads. - // All operations with the queue are done not in the threader_for primitives. - const size_t queueSize = 2 * Math::sPowx(base, Math::sCeil(Math::sLog(__KDTREE_FIRST_PART_LEAF_NODES_PER_THREAD) / Math::sLog(base))); + const size_t queueSize = 2 * Math::sPowx(base, Math::sCeil(Math::sLog(__KDTREE_FIRST_PART_LEAF_NODES_PER_THREAD) / Math::sLog(base))); const size_t firstPartLeafNodeCount = queueSize / 2; q.init(queueSize); const size_t xColumnCount = x.getNumberOfColumns(); @@ -201,7 +248,7 @@ Status KNNClassificationTrainBatchKernel(service_malloc(bboxSize * sizeof(BBox), sizeof(BBox))); + bboxQ = static_cast(service_malloc(bboxSize)); DAAL_CHECK_MALLOC(bboxQ) r.impl()->setLastNodeIndex(0); @@ -226,7 +273,7 @@ Status KNNClassificationTrainBatchKernel(service_malloc(subSampleCount * sizeof(algorithmFpType))); + algorithmFpType * subSamples = static_cast(service_malloc(subSampleCount)); DAAL_CHECK_MALLOC(subSamples) while (maxNodeCountForCurrentDepth < firstPartLeafNodeCount) @@ -320,8 +367,8 @@ Status KNNClassificationTrainBatchKernel(x).getBlockOfColumnValues(j, 0, xRowCount, readOnly, columnBD); const algorithmFpType * const dx = columnBD.getBlockPtr(); - - daal::tls bboxTLS([=, &status]() -> BBox * { + SafeStatus safeStat; + daal::tls bboxTLS([&]() -> BBox * { BBox * const ptr = service_scalable_calloc(1); if (ptr) { @@ -330,51 +377,52 @@ Status KNNClassificationTrainBatchKernel(static_cast(first + rowsPerBlock), xRowCount); + DAAL_CHECK_MALLOC_THR(bboxLocal); + const size_t first = iBlock * rowsPerBlock; + const size_t last = min(static_cast(first + rowsPerBlock), xRowCount); - if (first < last) + if (first < last) + { + BBox b; + size_t i = first; + b.upper = dx[indexes[i]]; + b.lower = dx[indexes[i]]; + PRAGMA_IVDEP + for (++i; i < last; ++i) { - BBox b; - size_t i = first; - b.upper = dx[indexes[i]]; - b.lower = dx[indexes[i]]; - PRAGMA_IVDEP - for (++i; i < last; ++i) + if (b.lower > dx[indexes[i]]) { - if (b.lower > dx[indexes[i]]) - { - b.lower = dx[indexes[i]]; - } - if (b.upper < dx[indexes[i]]) - { - b.upper = dx[indexes[i]]; - } - } - - if (bboxLocal->upper < b.upper) - { - bboxLocal->upper = b.upper; + b.lower = dx[indexes[i]]; } - if (bboxLocal->lower > b.lower) + if (b.upper < dx[indexes[i]]) { - bboxLocal->lower = b.lower; + b.upper = dx[indexes[i]]; } } + + if (bboxLocal->upper < b.upper) + { + bboxLocal->upper = b.upper; + } + if (bboxLocal->lower > b.lower) + { + bboxLocal->lower = b.lower; + } } }); + status = safeStat.detach(); + if (!status) return status; + bboxTLS.reduce([=](BBox * v) -> void { if (v) { @@ -718,8 +766,8 @@ size_t KNNClassificationTrainBatchKernel(service_malloc(idxMultiplier * (blockCount + 1) * sizeof(size_t))); - size_t * rightSegmentStartPerBlock = static_cast(service_malloc(idxMultiplier * blockCount * sizeof(size_t))); + size_t * leftSegmentStartPerBlock = static_cast(service_malloc(idxMultiplier * (blockCount + 1))); + size_t * rightSegmentStartPerBlock = static_cast(service_malloc(idxMultiplier * blockCount)); if (!leftSegmentStartPerBlock || !rightSegmentStartPerBlock) { @@ -848,10 +896,7 @@ Status KNNClassificationTrainBatchKernel(service_malloc(xRowCount * sizeof(algorithmFpType))))); + (rx != wx) ? wx : (buffer ? buffer : (buffer = static_cast(service_malloc(xRowCount)))); if (!awx) { status.add(services::ErrorMemoryAllocationFailed); @@ -936,7 +981,7 @@ Status KNNClassificationTrainBatchKernel(service_malloc(q.size() * sizeof(BuildNode))); + BuildNode * bnQ = static_cast(service_malloc(q.size())); DAAL_CHECK_MALLOC(bnQ) size_t posQ = 0; while (q.size() > 0) @@ -974,7 +1019,7 @@ Status KNNClassificationTrainBatchKernel(service_malloc((maxThreads + 1) * sizeof(*firstNodeIndex))); + size_t * firstNodeIndex = static_cast(service_malloc((maxThreads + 1))); DAAL_CHECK_MALLOC(firstNodeIndex) size_t nodeIndex = lastNodeIndex; for (size_t i = 0; i < maxThreads; ++i) @@ -993,7 +1038,7 @@ Status KNNClassificationTrainBatchKernelbboxes = service_scalable_calloc(ptr->bboxesCapacity * xColumnCount)) != nullptr) && ((ptr->inSortValues = service_scalable_calloc(__KDTREE_INDEX_VALUE_PAIRS_PER_THREAD)) != nullptr) && ((ptr->outSortValues = service_scalable_calloc(__KDTREE_INDEX_VALUE_PAIRS_PER_THREAD)) != nullptr) - && ((ptr->fixupQueue = static_cast(service_malloc(ptr->fixupQueueCapacity * sizeof(size_t)))) != nullptr) + && ((ptr->fixupQueue = static_cast(service_malloc(ptr->fixupQueueCapacity))) != nullptr) && ptr->buildStack.init(stackSize))) { status.add(services::ErrorMemoryAllocationFailed); @@ -1021,7 +1066,8 @@ Status KNNClassificationTrainBatchKernelfixupQueueIndex >= local->fixupQueueCapacity) { const size_t newCapacity = local->fixupQueueCapacity * 2; - size_t * const newQueue = static_cast(service_malloc(newCapacity * sizeof(size_t))); + size_t * const newQueue = static_cast(service_malloc(newCapacity)); DAAL_CHECK_THR(newQueue, services::ErrorMemoryAllocationFailed); result |= daal::services::internal::daal_memcpy_s(newQueue, newCapacity * sizeof(size_t), local->fixupQueue, local->fixupQueueIndex * sizeof(size_t)); @@ -1130,14 +1176,12 @@ Status KNNClassificationTrainBatchKernel( local->extraKDTreeNodesCapacity > 0 ? local->extraKDTreeNodesCapacity * 2 : static_cast(1024), extraIndex + 1); - KDTreeNode * const newNodes = - static_cast(service_malloc(newCapacity * sizeof(KDTreeNode))); + KDTreeNode * const newNodes = static_cast(service_malloc(newCapacity)); DAAL_CHECK_THR(newNodes, services::ErrorMemoryAllocationFailed); - result |= daal::services::internal::daal_memcpy_s(newNodes, newCapacity * sizeof(KDTreeNode), - local->extraKDTreeNodes, - local->extraKDTreeNodesCapacity * sizeof(KDTreeNode)); + result |= daal::services::internal::daal_memcpy_s(newNodes, newCapacity, local->extraKDTreeNodes, + local->extraKDTreeNodesCapacity); KDTreeNode * oldNodes = local->extraKDTreeNodes; local->extraKDTreeNodes = newNodes; local->extraKDTreeNodesCapacity = newCapacity; @@ -1148,8 +1192,8 @@ Status KNNClassificationTrainBatchKernelextraKDTreeNodesCapacity = max(extraIndex + 1, static_cast(1024)); - local->extraKDTreeNodes = static_cast( - service_malloc(local->extraKDTreeNodesCapacity * sizeof(KDTreeNode))); + local->extraKDTreeNodes = + static_cast(service_malloc(local->extraKDTreeNodesCapacity)); DAAL_CHECK_THR(local->extraKDTreeNodes, services::ErrorMemoryAllocationFailed); } @@ -1203,7 +1247,7 @@ Status KNNClassificationTrainBatchKernel Status { int result = 0; - bool isNeedToReindex = false; + bool isNeedToReindex = true; localTLS.reduce([=, &isNeedToReindex](Local * ptr) -> void { if (ptr && ptr->extraKDTreeNodes) { @@ -1360,7 +1404,7 @@ algorithmFpType KNNClassificationTrainBatchKernel(service_malloc(sampleCount * sizeof(*samples))); + algorithmFpType * samples = static_cast(service_malloc(sampleCount)); if (!samples) { status = services::ErrorMemoryAllocationFailed; @@ -1385,7 +1429,7 @@ algorithmFpType KNNClassificationTrainBatchKernel(sampleCount, samples); - size_t * hist = static_cast(service_malloc(sampleCount * sizeof(*hist))); + size_t * hist = static_cast(service_malloc(sampleCount)); if (!hist) { status = services::ErrorMemoryAllocationFailed; @@ -1398,7 +1442,7 @@ algorithmFpType KNNClassificationTrainBatchKernel(service_malloc(subSampleCount * sizeof(*subSamples))); + algorithmFpType * subSamples = static_cast(service_malloc(subSampleCount)); if (!subSamples) { status = services::ErrorMemoryAllocationFailed; From 37f9ecd620be207c2f979ea062bc3b72e6cecdde Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Wed, 4 Dec 2024 22:30:46 -0800 Subject: [PATCH 2/7] minor fix --- ...ication_predict_dense_default_batch_impl.i | 13 +- ..._classification_train_dense_default_impl.i | 114 ++++++++---------- 2 files changed, 53 insertions(+), 74 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index b6d987c2cd3..f72b03b4168 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -39,7 +39,6 @@ #include "src/algorithms/k_nearest_neighbors/kdtree_knn_classification_model_impl.h" #include "src/algorithms/k_nearest_neighbors/kdtree_knn_impl.i" #include "src/algorithms/k_nearest_neighbors/knn_heap.h" -#include namespace daal { @@ -151,17 +150,7 @@ Status KNNClassificationPredictKernel::compu const KDTreeNode * const nodes = static_cast(kdTreeTable.getArray()); const size_t xRowCount = x->getNumberOfRows(); - const algorithmFpType base = 2.0; - const algorithmFpType baseInPower = Math::sPowx(base, Math::sCeil(Math::sLog(base * xRowCount - 1) / Math::sLog(base))); - DAAL_ASSERT(baseInPower > 0) - const size_t maxKDTreeNodeCount = ((size_t)baseInPower * __KDTREE_MAX_NODE_COUNT_MULTIPLICATION_FACTOR) / __KDTREE_LEAF_BUCKET_SIZE + 1; - for (int index = 0; index < maxKDTreeNodeCount; index++) - { - const KDTreeNode & node = nodes[index]; - - std::cout << "Node Index: " << index << ", Dimension: " << node.dimension << ", Cut Point: " << node.cutPoint - << ", Left Index: " << node.leftIndex << ", Right Index: " << node.rightIndex << std::endl; - } + const algorithmFpType base = 2.0; const auto rootTreeNodeIndex = model->impl()->getRootNodeIndex(); const NumericTable & data = *(model->impl()->getData()); diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i index 1a2a0f5359a..2745a16bc0f 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i @@ -368,7 +368,7 @@ Status KNNClassificationTrainBatchKernel(x).getBlockOfColumnValues(j, 0, xRowCount, readOnly, columnBD); const algorithmFpType * const dx = columnBD.getBlockPtr(); SafeStatus safeStat; - daal::tls bboxTLS([&]() -> BBox * { + daal::tls bboxTLS([&safeStat]() -> BBox * { BBox * const ptr = service_scalable_calloc(1); if (ptr) { @@ -1246,79 +1246,69 @@ Status KNNClassificationTrainBatchKernel Status { - int result = 0; - bool isNeedToReindex = true; - localTLS.reduce([=, &isNeedToReindex](Local * ptr) -> void { - if (ptr && ptr->extraKDTreeNodes) + int result = 0; + + size_t actualNodeCount = lastNodeIndex; + localTLS.reduce([=, &actualNodeCount](Local * ptr) -> void { + if (ptr) { - isNeedToReindex = true; + actualNodeCount += ptr->nodeIndex - firstNodeIndex[ptr->threadIndex]; } }); - if (isNeedToReindex) - { - size_t actualNodeCount = lastNodeIndex; - localTLS.reduce([=, &actualNodeCount](Local * ptr) -> void { - if (ptr) - { - actualNodeCount += ptr->nodeIndex - firstNodeIndex[ptr->threadIndex]; - } - }); - - Status s; - KDTreeTablePtr newKDTreeTable(new KDTreeTable(actualNodeCount, s)); - DAAL_CHECK_STATUS_VAR(s); - KDTreeNode * const oldRoot = static_cast(kdTreeTable.getArray()); - KDTreeNode * const newRoot = static_cast(newKDTreeTable->getArray()); + Status s; + KDTreeTablePtr newKDTreeTable(new KDTreeTable(actualNodeCount, s)); + DAAL_CHECK_STATUS_VAR(s); + KDTreeNode * const oldRoot = static_cast(kdTreeTable.getArray()); + KDTreeNode * const newRoot = static_cast(newKDTreeTable->getArray()); - result |= daal::services::internal::daal_memcpy_s(newRoot, actualNodeCount * sizeof(KDTreeNode), oldRoot, - lastNodeIndex * sizeof(KDTreeNode)); + result |= + daal::services::internal::daal_memcpy_s(newRoot, actualNodeCount * sizeof(KDTreeNode), oldRoot, lastNodeIndex * sizeof(KDTreeNode)); - size_t newNodeIndex = lastNodeIndex; - localTLS.reduce([=, &result, &newNodeIndex](Local * ptr) -> void { - if (ptr) + size_t newNodeIndex = lastNodeIndex; + localTLS.reduce([=, &result, &newNodeIndex](Local * ptr) -> void { + if (ptr) + { + const size_t oldNodeIndex = firstNodeIndex[ptr->threadIndex]; + if (ptr->nodeIndex != oldNodeIndex) { - const size_t oldNodeIndex = firstNodeIndex[ptr->threadIndex]; - if (ptr->nodeIndex != oldNodeIndex) + const size_t extraNodeIndex = firstNodeIndex[ptr->threadIndex + 1]; + if (ptr->nodeIndex > extraNodeIndex) { - const size_t extraNodeIndex = firstNodeIndex[ptr->threadIndex + 1]; - if (ptr->nodeIndex > extraNodeIndex) - { - result |= daal::services::internal::daal_memcpy_s( - &newRoot[newNodeIndex], (actualNodeCount - newNodeIndex) * sizeof(KDTreeNode), &oldRoot[oldNodeIndex], - (extraNodeIndex - oldNodeIndex) * sizeof(KDTreeNode)); - const size_t idx = newNodeIndex + (extraNodeIndex - oldNodeIndex); - result |= daal::services::internal::daal_memcpy_s(&newRoot[idx], (actualNodeCount - idx) * sizeof(KDTreeNode), - ptr->extraKDTreeNodes, - (ptr->nodeIndex - extraNodeIndex) * sizeof(KDTreeNode)); - } - else - { - result |= daal::services::internal::daal_memcpy_s( - &newRoot[newNodeIndex], (actualNodeCount - newNodeIndex) * sizeof(KDTreeNode), &oldRoot[oldNodeIndex], - (ptr->nodeIndex - oldNodeIndex) * sizeof(KDTreeNode)); - } - const long delta = newNodeIndex - oldNodeIndex; - for (size_t i = 0; i < ptr->fixupQueueIndex; ++i) - { - newRoot[ptr->fixupQueue[i]].leftIndex += delta; - newRoot[ptr->fixupQueue[i]].rightIndex += delta; - } - for (size_t i = newNodeIndex, end = newNodeIndex + ptr->nodeIndex - oldNodeIndex; i < end; ++i) + result |= + daal::services::internal::daal_memcpy_s(&newRoot[newNodeIndex], (actualNodeCount - newNodeIndex) * sizeof(KDTreeNode), + &oldRoot[oldNodeIndex], (extraNodeIndex - oldNodeIndex) * sizeof(KDTreeNode)); + const size_t idx = newNodeIndex + (extraNodeIndex - oldNodeIndex); + result |= daal::services::internal::daal_memcpy_s(&newRoot[idx], (actualNodeCount - idx) * sizeof(KDTreeNode), + ptr->extraKDTreeNodes, + (ptr->nodeIndex - extraNodeIndex) * sizeof(KDTreeNode)); + } + else + { + result |= + daal::services::internal::daal_memcpy_s(&newRoot[newNodeIndex], (actualNodeCount - newNodeIndex) * sizeof(KDTreeNode), + &oldRoot[oldNodeIndex], (ptr->nodeIndex - oldNodeIndex) * sizeof(KDTreeNode)); + } + const long delta = newNodeIndex - oldNodeIndex; + for (size_t i = 0; i < ptr->fixupQueueIndex; ++i) + { + newRoot[ptr->fixupQueue[i]].leftIndex += delta; + newRoot[ptr->fixupQueue[i]].rightIndex += delta; + } + for (size_t i = newNodeIndex, end = newNodeIndex + ptr->nodeIndex - oldNodeIndex; i < end; ++i) + { + if (newRoot[i].dimension != __KDTREE_NULLDIMENSION) { - if (newRoot[i].dimension != __KDTREE_NULLDIMENSION) - { - newRoot[i].leftIndex += delta; - newRoot[i].rightIndex += delta; - } + newRoot[i].leftIndex += delta; + newRoot[i].rightIndex += delta; } - newNodeIndex += ptr->nodeIndex - oldNodeIndex; } + newNodeIndex += ptr->nodeIndex - oldNodeIndex; } - }); - r.impl()->setKDTreeTable(newKDTreeTable); - r.impl()->setLastNodeIndex(newNodeIndex); - } + } + }); + r.impl()->setKDTreeTable(newKDTreeTable); + r.impl()->setLastNodeIndex(newNodeIndex); return (!result) ? Status() : Status(ErrorMemoryCopyFailedInternal); }(); From a9432701dc4faa00c5f781b15d3d0d856994dff2 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Wed, 4 Dec 2024 23:14:07 -0800 Subject: [PATCH 3/7] remove unused --- ...kdtree_knn_classification_predict_dense_default_batch_impl.i | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index f72b03b4168..86852bfd26d 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -147,9 +147,7 @@ Status KNNClassificationPredictKernel::compu const Model * const model = static_cast(m); const KDTreeTable & kdTreeTable = *(model->impl()->getKDTreeTable()); - const KDTreeNode * const nodes = static_cast(kdTreeTable.getArray()); const size_t xRowCount = x->getNumberOfRows(); - const algorithmFpType base = 2.0; const auto rootTreeNodeIndex = model->impl()->getRootNodeIndex(); From 696b5c42059f66933b78909716e187326b39acd1 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Tue, 10 Dec 2024 07:19:29 -0800 Subject: [PATCH 4/7] minor fixes --- ...kdtree_knn_classification_predict_dense_default_batch_impl.i | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 86852bfd26d..258d742fd19 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -148,7 +148,7 @@ Status KNNClassificationPredictKernel::compu const Model * const model = static_cast(m); const KDTreeTable & kdTreeTable = *(model->impl()->getKDTreeTable()); const size_t xRowCount = x->getNumberOfRows(); - const algorithmFpType base = 2.0; + const algorithmFpType base = 2.0; const auto rootTreeNodeIndex = model->impl()->getRootNodeIndex(); const NumericTable & data = *(model->impl()->getData()); From a5e7fd783ab482b860b5cf516a51488f7c00976e Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Mon, 16 Dec 2024 01:25:23 -0800 Subject: [PATCH 5/7] minor fixes --- ...ication_predict_dense_default_batch_impl.i | 12 ++- ..._classification_train_dense_default_impl.i | 73 ++++++++----------- 2 files changed, 35 insertions(+), 50 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i index 258d742fd19..5e2ef3e30aa 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -147,13 +147,9 @@ Status KNNClassificationPredictKernel::compu const Model * const model = static_cast(m); const KDTreeTable & kdTreeTable = *(model->impl()->getKDTreeTable()); - const size_t xRowCount = x->getNumberOfRows(); - const algorithmFpType base = 2.0; - - const auto rootTreeNodeIndex = model->impl()->getRootNodeIndex(); - const NumericTable & data = *(model->impl()->getData()); - const NumericTable * labels = nullptr; - + const auto rootTreeNodeIndex = model->impl()->getRootNodeIndex(); + const NumericTable & data = *(model->impl()->getData()); + const NumericTable * labels = nullptr; if (resultsToEvaluate != 0) { labels = model->impl()->getLabels().get(); @@ -168,6 +164,8 @@ Status KNNClassificationPredictKernel::compu } const size_t heapSize = (iSize / 16 + 1) * 16; + const size_t xRowCount = x->getNumberOfRows(); + const algorithmFpType base = 2.0; const size_t expectedMaxDepth = (Math::sLog(xRowCount) / Math::sLog(base) + 1) * __KDTREE_DEPTH_MULTIPLICATION_FACTOR; const size_t stackSize = Math::sPowx(base, Math::sCeil(Math::sLog(expectedMaxDepth) / Math::sLog(base))); struct Local diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i index dbb7cd3273a..1b0b992fc9e 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i @@ -64,7 +64,7 @@ class Queue static const size_t defaultSize = 4; public: - Queue() : _data(nullptr), _first(0), _last(0), _count(0), _size(0), _capacity(0) {} + Queue() : _data(nullptr), _first(0), _last(0), _count(0), _capacity(0) {} ~Queue() { clear(); } @@ -74,21 +74,19 @@ public: bool init(size_t size) { clear(); - if (size == 0) // Check for valid size + if (size == 0) { return false; } - _first = _count = 0; - _last = _sizeMinus1 = (_size = size) - 1; - _data = static_cast(service_malloc(size)); - - if (!_data) // Check if memory allocation was successful + _data = static_cast(service_malloc(size)); + if (!_data) { return false; } - _capacity = _size; // Initialize capacity + _capacity = size; + _first = _last = _count = 0; return true; } @@ -96,35 +94,30 @@ public: { if (_data) { - daal::services::internal::service_free(_data); // Free allocated memory if it exists + daal::services::internal::service_free(_data); _data = nullptr; } - _first = _last = _count = _size = _sizeMinus1 = _capacity = 0; // Reset state + _first = _last = _count = _capacity = 0; } void reset() { _first = _last = _count = 0; } - DAAL_FORCEINLINE void push(const T & value) + void push(const T & value) { - if (_count >= _capacity) // Check if capacity is exceeded + if (_count >= _capacity) { - services::Status status = grow(); // Grow if necessary - //DAAL_CHECK_STATUS_VAR(status); + grow(); } - _data[_last = (_last + 1) & _sizeMinus1] = value; // Add element to queue + _data[_last] = value; + ++_last; ++_count; } - DAAL_FORCEINLINE T pop() + T pop() { - // if (empty()) // Check if queue is empty - // { - // throw std::underflow_error("Queue underflow: no elements to pop."); - // } - - const T value = _data[_first++]; // Retrieve element - _first *= (_first != _size); // Reset first index if it reaches the end + T value = _data[_first]; + _first = (_first + 1) % _capacity; --_count; return value; } @@ -134,34 +127,28 @@ public: size_t size() const { return _count; } private: - services::Status grow() + void grow() { - int result = 0; - _capacity = (_capacity == 0 ? defaultSize : _capacity * 2); // Double capacity or set to default + size_t newCapacity = (_capacity == 0 ? defaultSize : _capacity * 2); + T * newData = static_cast(service_malloc(newCapacity)); - T * const newData = daal::services::internal::service_malloc(_capacity); - DAAL_CHECK_MALLOC(newData); - - if (_data != nullptr) + for (size_t i = 0; i < _count; ++i) { - result = services::internal::daal_memcpy_s(newData, _last * sizeof(T), _data, _last * sizeof(T)); - daal::services::internal::service_free(_data); // Free old data - _data = nullptr; + newData[i] = _data[(_first + i) % _capacity]; } - _data = newData; // Assign new expanded memory - _size = _capacity; // Adjust size to new capacity - _sizeMinus1 = _capacity - 1; // Update size minus 1 for wrapping - return (!result) ? services::Status() : services::Status(services::ErrorMemoryCopyFailedInternal); + daal::services::internal::service_free(_data); + _data = newData; + _capacity = newCapacity; + _first = 0; + _last = _count; } T * _data; - size_t _first; // Index of the first element - size_t _last; // Index of the last element - size_t _count; // Current number of elements - size_t _size; // Current size of the queue - size_t _sizeMinus1; // Helper for wrap-around logic - size_t _capacity; // Maximum capacity of the queue + size_t _first; // Index of the first element + size_t _last; // Index of the next position to insert + size_t _count; // Current number of elements + size_t _capacity; // Maximum capacity of the queue }; struct BuildNode From 343617997ce7f8592320153838fa7e499c194f82 Mon Sep 17 00:00:00 2001 From: Alexandr-Solovev Date: Tue, 17 Dec 2024 07:09:55 -0800 Subject: [PATCH 6/7] minor fixes --- .../kdtree_knn_classification_train_dense_default_impl.i | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i index 1b0b992fc9e..ba4c93ec772 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i @@ -61,6 +61,9 @@ using namespace kdtree_knn_classification::internal; template class Queue { + // Default size of the queue. This value is suitable for small allocations + // during construction, but the primary use case involves calling the init(size) + // function. In most cases, init(size) will be called. static const size_t defaultSize = 4; public: @@ -368,7 +371,7 @@ Status KNNClassificationTrainBatchKernel Date: Wed, 18 Dec 2024 04:04:36 -0800 Subject: [PATCH 7/7] fixes --- ..._classification_train_dense_default_impl.i | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i index ba4c93ec772..53b3c8ff9b9 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i @@ -130,21 +130,23 @@ public: size_t size() const { return _count; } private: - void grow() + Status grow() { - size_t newCapacity = (_capacity == 0 ? defaultSize : _capacity * 2); - T * newData = static_cast(service_malloc(newCapacity)); + int result = 0; + _capacity = (_capacity == 0 ? defaultSize : _capacity * 2); + T * newData = static_cast(service_malloc(_capacity)); + DAAL_CHECK_MALLOC(newData); - for (size_t i = 0; i < _count; ++i) + if (_data != nullptr) { - newData[i] = _data[(_first + i) % _capacity]; + result = services::internal::daal_memcpy_s(newData, _last * sizeof(T), _data, _last * sizeof(T)); + daal::services::internal::service_free(_data); + _data = nullptr; } - daal::services::internal::service_free(_data); - _data = newData; - _capacity = newCapacity; - _first = 0; - _last = _count; + _data = newData; + + return (!result) ? Status() : Status(services::ErrorMemoryCopyFailedInternal); } T * _data;