From 15bc8011f45facc071368271f18699f8dd11ae24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=9B=A7=E5=9B=A7?= Date: Wed, 28 Feb 2024 01:07:26 +0800 Subject: [PATCH] Revert "Replace std::hash with our custom hash function" --- src/include/function/hash/hash_functions.h | 36 ++++--------------- .../operator/persistent/index_builder.h | 6 ++-- src/include/storage/index/hash_index.h | 2 +- .../storage/index/hash_index_builder.h | 13 ++++--- src/include/storage/index/hash_index_utils.h | 16 ++++++--- test/test_files/update_node/create_empty.test | 32 ----------------- 6 files changed, 29 insertions(+), 76 deletions(-) diff --git a/src/include/function/hash/hash_functions.h b/src/include/function/hash/hash_functions.h index 9cfd61f3944..0e85b727799 100644 --- a/src/include/function/hash/hash_functions.h +++ b/src/include/function/hash/hash_functions.h @@ -9,7 +9,6 @@ #include "common/types/int128_t.h" #include "common/types/interval_t.h" #include "common/types/ku_string.h" -#include "common/types/types.h" #include "common/vector/value_vector.h" namespace kuzu { @@ -134,52 +133,31 @@ inline void Hash::operation( template<> inline void Hash::operation( const double& key, common::hash_t& result, common::ValueVector* /*keyVector*/) { - // 0 and -0 are not byte-equivalent, but should have the same hash - if (key == 0) { - result = murmurhash64(0); - } else { - result = murmurhash64(*reinterpret_cast(&key)); - } + result = std::hash()(key); } template<> inline void Hash::operation( const float& key, common::hash_t& result, common::ValueVector* /*keyVector*/) { - // 0 and -0 are not byte-equivalent, but should have the same hash - if (key == 0) { - result = murmurhash64(0); - } else { - result = murmurhash64(*reinterpret_cast(&key)); - } + result = std::hash()(key); } template<> inline void Hash::operation( - const std::string_view& key, common::hash_t& result, common::ValueVector* /*keyVector*/) { - common::hash_t hashValue = 0; - auto data64 = reinterpret_cast(key.data()); - for (size_t i = 0u; i < key.size() / 8; i++) { - auto blockHash = kuzu::function::murmurhash64(*(data64 + i)); - hashValue = kuzu::function::combineHashScalar(hashValue, blockHash); - } - uint64_t last = 0; - for (size_t i = 0u; i < key.size() % 8; i++) { - last |= key[key.size() / 8 * 8 + i] << i * 8; - } - hashValue = kuzu::function::combineHashScalar(hashValue, kuzu::function::murmurhash64(last)); - result = hashValue; + const std::string& key, common::hash_t& result, common::ValueVector* /*keyVector*/) { + result = std::hash()(key); } template<> inline void Hash::operation( - const std::string& key, common::hash_t& result, common::ValueVector* /*keyVector*/) { - Hash::operation(std::string_view(key), result); + const std::string_view& key, common::hash_t& result, common::ValueVector* /*keyVector*/) { + result = std::hash()(key); } template<> inline void Hash::operation( const common::ku_string_t& key, common::hash_t& result, common::ValueVector* /*keyVector*/) { - Hash::operation(key.getAsStringView(), result); + result = std::hash()(key.getAsStringView()); } template<> diff --git a/src/include/processor/operator/persistent/index_builder.h b/src/include/processor/operator/persistent/index_builder.h index 529e28e0724..7069d780a9f 100644 --- a/src/include/processor/operator/persistent/index_builder.h +++ b/src/include/processor/operator/persistent/index_builder.h @@ -66,10 +66,10 @@ class IndexBuilderLocalBuffers { explicit IndexBuilderLocalBuffers(IndexBuilderGlobalQueues& globalQueues); void insert(std::string key, common::offset_t value) { - auto indexPos = storage::HashIndexUtils::getHashIndexPosition(std::string_view(key)); + auto indexPos = storage::getHashIndexPosition(key.c_str()); auto& stringBuffer = (*std::get>(buffers))[indexPos]; if (stringBuffer.full()) { - // StaticVector's move constructor leaves the original vector valid and empty + // StaticVector's move constructor leavse the original vector valid and empty globalQueues->insert(indexPos, std::move(stringBuffer)); } stringBuffer.push_back(std::make_pair(key, value)); // NOLINT(bugprone-use-after-move) @@ -77,7 +77,7 @@ class IndexBuilderLocalBuffers { template void insert(T key, common::offset_t value) { - auto indexPos = storage::HashIndexUtils::getHashIndexPosition(key); + auto indexPos = storage::getHashIndexPosition(key); auto& buffer = (*std::get>(buffers))[indexPos]; if (buffer.full()) { globalQueues->insert(indexPos, std::move(buffer)); diff --git a/src/include/storage/index/hash_index.h b/src/include/storage/index/hash_index.h index 79b7f6366b4..1dde9f4dfcc 100644 --- a/src/include/storage/index/hash_index.h +++ b/src/include/storage/index/hash_index.h @@ -207,7 +207,7 @@ class PrimaryKeyIndex { template inline HashIndex* getTypedHashIndex(T key) { return common::ku_dynamic_cast*>( - hashIndices[HashIndexUtils::getHashIndexPosition(key)].get()); + hashIndices[getHashIndexPosition(key)].get()); } inline bool lookup( diff --git a/src/include/storage/index/hash_index_builder.h b/src/include/storage/index/hash_index_builder.h index aaf2e81bc48..6f9ac0b7716 100644 --- a/src/include/storage/index/hash_index_builder.h +++ b/src/include/storage/index/hash_index_builder.h @@ -138,32 +138,31 @@ class PrimaryKeyIndexBuilder { // enough space already. template bool append(T key, common::offset_t value) { - return appendWithIndexPos(key, value, HashIndexUtils::getHashIndexPosition(key)); + return appendWithIndexPos(key, value, getHashIndexPosition(key)); } bool append(std::string_view key, common::offset_t value) { - return appendWithIndexPos(key, value, HashIndexUtils::getHashIndexPosition(key)); + return appendWithIndexPos(key, value, getHashIndexPosition(key)); } template bool appendWithIndexPos(T key, common::offset_t value, uint64_t indexPos) { KU_ASSERT(keyDataTypeID == common::TypeUtils::getPhysicalTypeIDForType()); - KU_ASSERT(HashIndexUtils::getHashIndexPosition(key) == indexPos); + KU_ASSERT(getHashIndexPosition(key) == indexPos); return getTypedHashIndex(indexPos)->append(key, value); } bool appendWithIndexPos(std::string_view key, common::offset_t value, uint64_t indexPos) { KU_ASSERT(keyDataTypeID == common::PhysicalTypeID::STRING); - KU_ASSERT(HashIndexUtils::getHashIndexPosition(key) == indexPos); + KU_ASSERT(getHashIndexPosition(key) == indexPos); return getTypedHashIndex(indexPos)->append( key, value); } template bool lookup(T key, common::offset_t& result) { KU_ASSERT(keyDataTypeID == common::TypeUtils::getPhysicalTypeIDForType()); - return getTypedHashIndex(HashIndexUtils::getHashIndexPosition(key))->lookup(key, result); + return getTypedHashIndex(getHashIndexPosition(key))->lookup(key, result); } bool lookup(std::string_view key, common::offset_t& result) { KU_ASSERT(keyDataTypeID == common::PhysicalTypeID::STRING); - return getTypedHashIndex( - HashIndexUtils::getHashIndexPosition(key)) + return getTypedHashIndex(getHashIndexPosition(key)) ->lookup(key, result); } diff --git a/src/include/storage/index/hash_index_utils.h b/src/include/storage/index/hash_index_utils.h index 24a1bd8f2c0..26f9c86047c 100644 --- a/src/include/storage/index/hash_index_utils.h +++ b/src/include/storage/index/hash_index_utils.h @@ -1,5 +1,7 @@ #pragma once +#include + #include "common/types/ku_string.h" #include "common/types/types.h" #include "function/hash/hash_functions.h" @@ -26,6 +28,16 @@ static constexpr common::page_idx_t O_SLOTS_HEADER_PAGE_IDX = 2; static constexpr common::page_idx_t NUM_HEADER_PAGES = 3; static constexpr uint64_t INDEX_HEADER_IDX_IN_ARRAY = 0; +inline uint64_t getHashIndexPosition(common::HashablePrimitive auto key) { + common::hash_t hash; + function::Hash::operation(key, hash, nullptr /*keyVector*/); + return (hash >> (64 - NUM_HASH_INDEXES_LOG2)) & (NUM_HASH_INDEXES - 1); +} +inline uint64_t getHashIndexPosition(std::string_view key) { + return (std::hash()(key) >> (64 - NUM_HASH_INDEXES_LOG2)) & + (NUM_HASH_INDEXES - 1); +} + enum class SlotType : uint8_t { PRIMARY = 0, OVF = 1 }; struct SlotInfo { @@ -65,10 +77,6 @@ class HashIndexUtils { return slotId; } - inline static uint64_t getHashIndexPosition(common::IndexHashable auto key) { - return (HashIndexUtils::hash(key) >> (64 - NUM_HASH_INDEXES_LOG2)) & (NUM_HASH_INDEXES - 1); - } - static inline uint64_t getNumRequiredEntries( uint64_t numExistingEntries, uint64_t numNewEntries) { return ceil((double)(numExistingEntries + numNewEntries) * common::DEFAULT_HT_LOAD_FACTOR); diff --git a/test/test_files/update_node/create_empty.test b/test/test_files/update_node/create_empty.test index b2abd0f580c..068e69062cf 100644 --- a/test/test_files/update_node/create_empty.test +++ b/test/test_files/update_node/create_empty.test @@ -353,38 +353,6 @@ foobar-ewe-j323-8nd*-ewew -STATEMENT MATCH (t:test) WHERE t.id = to_float(0.1) RETURN t.id; ---- 1 0.100000 --STATEMENT CREATE (t:test {id:0}); ----- ok -# Zero and negative zero need to be equivalent --STATEMENT MATCH (t:test) WHERE t.id = (0.0 * -1) RETURN t.id; ----- 1 -0.000000 --STATEMENT MATCH (t:test) WHERE t.id = -0.0 RETURN t.id; ----- 1 -0.000000 --STATEMENT MATCH (t:test) WHERE t.id = to_float("-0.0") RETURN t.id; ----- 1 -0.000000 -# infinity should work --STATEMENT CREATE (t:test {id:1.0/0.0}); ----- ok --STATEMENT MATCH (t:test) WHERE t.id = 1.0/0.0 RETURN t.id; ----- 1 -inf --STATEMENT MATCH (t:test) WHERE t.id = to_float("inf") RETURN t.id; ----- 1 -inf --STATEMENT MATCH (t:test) WHERE t.id = to_float("-inf") RETURN t.id; ----- 0 -# NaN should not be searchable --STATEMENT CREATE (t:test {id:0.0/0.0}); ----- ok --STATEMENT MATCH (t:test) WHERE t.id = 0.0/0.0 RETURN t.id; ----- 0 --STATEMENT CREATE (t:test {id:sqrt(-1.0)}); ----- ok --STATEMENT MATCH (t:test) WHERE t.id = sqrt(-1.0) RETURN t.id; ----- 0 -CASE CreateBlobPK -STATEMENT CREATE NODE TABLE test(id BLOB, PRIMARY KEY(id));