From 708d41bd26811881368b7d06d2ab9fb02c1b82f3 Mon Sep 17 00:00:00 2001 From: Alexander Root <32245479+rootjalex@users.noreply.github.com> Date: Mon, 21 Aug 2023 11:45:05 -0700 Subject: [PATCH 1/5] Don't introduce reinterprets in find/lower intrinsics (#7776) --- src/FindIntrinsics.cpp | 51 ++++++++++++++++++++++-------------------- src/IR.h | 7 ++++++ 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/src/FindIntrinsics.cpp b/src/FindIntrinsics.cpp index 4783d7cfdedc..29a8913e1068 100644 --- a/src/FindIntrinsics.cpp +++ b/src/FindIntrinsics.cpp @@ -157,16 +157,19 @@ Expr to_rounding_shift(const Call *c) { return rounding_shift(cast(add->type, add->args[0]), b); } } - // Also need to handle the annoying case of a reinterpret wrapping a widen_right_add + + // Also need to handle the annoying case of a reinterpret cast wrapping a widen_right_add // TODO: this pattern makes me want to change the semantics of this op. - if (const Reinterpret *reinterp = a.as()) { - if (reinterp->type.bits() == reinterp->value.type().bits()) { - if (const Call *add = Call::as_intrinsic(reinterp->value, {Call::widen_right_add})) { + if (const Cast *cast = a.as()) { + if (cast->is_reinterpret()) { + if (const Call *add = Call::as_intrinsic(cast->value, {Call::widen_right_add})) { if (can_prove(lower_intrinsics(add->args[1] == round))) { - // We expect the first operand to be a reinterpet. - const Reinterpret *reinterp_a = add->args[0].as(); - internal_assert(reinterp_a) << "Failed: " << add->args[0] << "\n"; - return rounding_shift(reinterp_a->value, b); + // We expect the first operand to be a reinterpet cast. + if (const Cast *cast_a = add->args[0].as()) { + if (cast_a->is_reinterpret()) { + return rounding_shift(cast_a->value, b); + } + } } } } @@ -245,9 +248,9 @@ class FindIntrinsics : public IRMutator { if (b.type().code() != narrow_a.type().code()) { // Need to do a safe reinterpret. Type t = b.type().with_code(code); - result = widen_right_add(reinterpret(t, b), narrow_a); + result = widen_right_add(cast(t, b), narrow_a); internal_assert(result.type() != op->type); - result = reinterpret(op->type, result); + result = cast(op->type, result); } else { result = widen_right_add(b, narrow_a); } @@ -258,9 +261,9 @@ class FindIntrinsics : public IRMutator { if (a.type().code() != narrow_b.type().code()) { // Need to do a safe reinterpret. Type t = a.type().with_code(code); - result = widen_right_add(reinterpret(t, a), narrow_b); + result = widen_right_add(cast(t, a), narrow_b); internal_assert(result.type() != op->type); - result = reinterpret(op->type, result); + result = cast(op->type, result); } else { result = widen_right_add(a, narrow_b); } @@ -328,9 +331,9 @@ class FindIntrinsics : public IRMutator { if (a.type().code() != narrow_b.type().code()) { // Need to do a safe reinterpret. Type t = a.type().with_code(code); - result = widen_right_sub(reinterpret(t, a), narrow_b); + result = widen_right_sub(cast(t, a), narrow_b); internal_assert(result.type() != op->type); - result = reinterpret(op->type, result); + result = cast(op->type, result); } else { result = widen_right_sub(a, narrow_b); } @@ -410,9 +413,9 @@ class FindIntrinsics : public IRMutator { if (b.type().code() != narrow_a.type().code()) { // Need to do a safe reinterpret. Type t = b.type().with_code(code); - result = widen_right_mul(reinterpret(t, b), narrow_a); + result = widen_right_mul(cast(t, b), narrow_a); internal_assert(result.type() != op->type); - result = reinterpret(op->type, result); + result = cast(op->type, result); } else { result = widen_right_mul(b, narrow_a); } @@ -423,9 +426,9 @@ class FindIntrinsics : public IRMutator { if (a.type().code() != narrow_b.type().code()) { // Need to do a safe reinterpret. Type t = a.type().with_code(code); - result = widen_right_mul(reinterpret(t, a), narrow_b); + result = widen_right_mul(cast(t, a), narrow_b); internal_assert(result.type() != op->type); - result = reinterpret(op->type, result); + result = cast(op->type, result); } else { result = widen_right_mul(a, narrow_b); } @@ -1261,8 +1264,8 @@ Expr lower_saturating_add(const Expr &a, const Expr &b) { return select(sum < a, a.type().max(), sum); } else if (a.type().is_int()) { Type u = a.type().with_code(halide_type_uint); - Expr ua = reinterpret(u, a); - Expr ub = reinterpret(u, b); + Expr ua = cast(u, a); + Expr ub = cast(u, b); Expr upper = make_const(u, (uint64_t(1) << (a.type().bits() - 1)) - 1); Expr lower = make_const(u, (uint64_t(1) << (a.type().bits() - 1))); Expr sum = ua + ub; @@ -1272,7 +1275,7 @@ Expr lower_saturating_add(const Expr &a, const Expr &b) { // a + b >= 0 === a >= -b === a >= ~b + 1 === a > ~b Expr pos_result = min(sum, upper); Expr neg_result = max(sum, lower); - return simplify(reinterpret(a.type(), select(~b < a, pos_result, neg_result))); + return simplify(cast(a.type(), select(~b < a, pos_result, neg_result))); } else { internal_error << "Bad type for saturating_add: " << a.type() << "\n"; return Expr(); @@ -1288,8 +1291,8 @@ Expr lower_saturating_sub(const Expr &a, const Expr &b) { } else if (a.type().is_int()) { // Do the math in unsigned, to avoid overflow in the simplifier. Type u = a.type().with_code(halide_type_uint); - Expr ua = reinterpret(u, a); - Expr ub = reinterpret(u, b); + Expr ua = cast(u, a); + Expr ub = cast(u, b); Expr upper = make_const(u, (uint64_t(1) << (a.type().bits() - 1)) - 1); Expr lower = make_const(u, (uint64_t(1) << (a.type().bits() - 1))); Expr diff = ua - ub; @@ -1300,7 +1303,7 @@ Expr lower_saturating_sub(const Expr &a, const Expr &b) { // and saturate the negative difference to be at least -2^31 + 2^32 = 2^31 Expr neg_diff = max(lower, diff); // Then select between them, and cast back to the signed type. - return simplify(reinterpret(a.type(), select(b <= a, pos_diff, neg_diff))); + return simplify(cast(a.type(), select(b <= a, pos_diff, neg_diff))); } else if (a.type().is_uint()) { return simplify(select(b < a, a - b, make_zero(a.type()))); } else { diff --git a/src/IR.h b/src/IR.h index c187053db81d..ba4fcb09b587 100644 --- a/src/IR.h +++ b/src/IR.h @@ -32,6 +32,13 @@ struct Cast : public ExprNode { static Expr make(Type t, Expr v); static const IRNodeType _node_type = IRNodeType::Cast; + + /** Check if the cast is equivalent to a reinterpret. */ + bool is_reinterpret() const { + return (type.is_int_or_uint() && + value.type().is_int_or_uint() && + type.bits() == value.type().bits()); + } }; /** Reinterpret value as another type, without affecting any of the bits From fcc1c3b24427439ea0443474f2f90d44b8c6c32f Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar Date: Mon, 21 Aug 2023 16:16:51 -0500 Subject: [PATCH 2/5] [Hexagon] -Build Hexagon runtime components using the Hexagon SDK (Clone of #7671) (#7741) * Add CMakeLists.txt to build the hexagon_remote runtime. * Print an error message if libhalide_hexagon_host.so is not found. * Fix case mismatch in hexagon_remote/CMakeLists.txt * Remove some code that had been commented out in hexagon_remote/CMakeLists.txt * Remove unused argument in macro in hexagon_remote/CMakeLists.txt * add find module for Hexagon * move more variables to find module * Build binary modules with ExternalProject * group platform-speicifc sources into subdirectories * Pass HEXAGON_TOOLS_ROOT, too * Use the desired layout for the build-tree artifacts * Use SYSTEM for Hexagon SDK include dirs * trigger buildbots * Ignore code in src/runtime/hexagon_remote/bin/src for clang-tidy * Just skip hexagon_remote entirely for Halide_CLANG_TIDY_BUILD * Add an option to enable the building of the hexagon remote runtime --------- Co-authored-by: Alex Reinking Co-authored-by: Steven Johnson --- cmake/FindHexagonSDK.cmake | 92 +++++++++++++++++++ src/runtime/CMakeLists.txt | 4 + src/runtime/hexagon_host.cpp | 6 +- src/runtime/hexagon_remote/CMakeLists.txt | 74 +++++++++++++++ .../hexagon_remote/android/CMakeLists.txt | 41 +++++++++ .../{ => android}/host_malloc.cpp | 0 .../{ => android}/host_shim.cpp | 0 .../{ => android}/libadsprpc_shim.cpp | 0 .../hexagon_remote/qurt/CMakeLists.txt | 43 +++++++++ .../hexagon_remote/{ => qurt}/c11_stubs.cpp | 0 .../{ => qurt}/halide_remote.cpp | 4 +- .../{ => qurt}/known_symbols.cpp | 0 .../hexagon_remote/{ => qurt}/known_symbols.h | 0 src/runtime/hexagon_remote/{ => qurt}/log.cpp | 0 src/runtime/hexagon_remote/{ => qurt}/log.h | 0 .../hexagon_remote/{ => qurt}/nearbyint.cpp | 0 .../hexagon_remote/{ => qurt}/sim_qurt.cpp | 0 .../{ => qurt}/sim_qurt_vtcm.cpp | 0 .../hexagon_remote/{ => qurt}/sim_remote.cpp | 0 19 files changed, 261 insertions(+), 3 deletions(-) create mode 100644 cmake/FindHexagonSDK.cmake create mode 100644 src/runtime/hexagon_remote/CMakeLists.txt create mode 100644 src/runtime/hexagon_remote/android/CMakeLists.txt rename src/runtime/hexagon_remote/{ => android}/host_malloc.cpp (100%) rename src/runtime/hexagon_remote/{ => android}/host_shim.cpp (100%) rename src/runtime/hexagon_remote/{ => android}/libadsprpc_shim.cpp (100%) create mode 100644 src/runtime/hexagon_remote/qurt/CMakeLists.txt rename src/runtime/hexagon_remote/{ => qurt}/c11_stubs.cpp (100%) rename src/runtime/hexagon_remote/{ => qurt}/halide_remote.cpp (99%) rename src/runtime/hexagon_remote/{ => qurt}/known_symbols.cpp (100%) rename src/runtime/hexagon_remote/{ => qurt}/known_symbols.h (100%) rename src/runtime/hexagon_remote/{ => qurt}/log.cpp (100%) rename src/runtime/hexagon_remote/{ => qurt}/log.h (100%) rename src/runtime/hexagon_remote/{ => qurt}/nearbyint.cpp (100%) rename src/runtime/hexagon_remote/{ => qurt}/sim_qurt.cpp (100%) rename src/runtime/hexagon_remote/{ => qurt}/sim_qurt_vtcm.cpp (100%) rename src/runtime/hexagon_remote/{ => qurt}/sim_remote.cpp (100%) diff --git a/cmake/FindHexagonSDK.cmake b/cmake/FindHexagonSDK.cmake new file mode 100644 index 000000000000..9ee72008254b --- /dev/null +++ b/cmake/FindHexagonSDK.cmake @@ -0,0 +1,92 @@ +include(FindPackageHandleStandardArgs) + +## +# Find the Hexagon SDK root + +# We use the presense of the hexagon toolchain file to determine the SDK +# root. Other files have names that are too generic (like readme.txt) or +# are platform-specific (like setup_sdk_env.source) to and so can't be +# used to autodetect the path. Plus, we need to find this file anyway. + +find_path( + HEXAGON_SDK_ROOT build/cmake/hexagon_toolchain.cmake + HINTS ENV HEXAGON_SDK_ROOT +) + +## +# Detect the installed Hexagon tools version + +if (NOT DEFINED HEXAGON_TOOLS_VER AND DEFINED ENV{HEXAGON_TOOLS_VER}) + set(HEXAGON_TOOLS_VER "$ENV{HEXAGON_TOOLS_VER}") +endif () + +if (NOT DEFINED HEXAGON_TOOLS_VER) + # No other way to list a directory; no need for CONFIGURE_DEPENDS here + # since this is just used to initialize a cache variable. + file( + GLOB tools_versions + RELATIVE "${HEXAGON_SDK_ROOT}/tools/HEXAGON_Tools" + "${HEXAGON_SDK_ROOT}/tools/HEXAGON_Tools/*" + ) + if (NOT tools_versions STREQUAL "") + list(GET tools_versions 0 HEXAGON_TOOLS_VER) + endif () +endif () + +set(HEXAGON_TOOLS_VER "${HEXAGON_TOOLS_VER}" + CACHE STRING "Version of the Hexagon tools to use") + +set(HEXAGON_TOOLS_ROOT "${HEXAGON_SDK_ROOT}/tools/HEXAGON_Tools/${HEXAGON_TOOLS_VER}") + +## +# Set known paths + +set(HEXAGON_TOOLCHAIN ${HEXAGON_SDK_ROOT}/build/cmake/hexagon_toolchain.cmake) +set(HEXAGON_QAIC ${HEXAGON_SDK_ROOT}/ipc/fastrpc/qaic/Ubuntu16/qaic) + +set(ANDROID_NDK_ROOT ${HEXAGON_SDK_ROOT}/tools/android-ndk-r19c) +set(ANDROID_NDK_TOOLCHAIN ${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake) + +## +# Find ISS wrapper library and headers + +find_library( + HEXAGON_ISS_WRAPPER_LIBRARY + NAMES wrapper + HINTS "${HEXAGON_TOOLS_ROOT}" + PATH_SUFFIXES Tools/lib/iss lib/iss iss +) + +find_path( + HEXAGON_ISS_WRAPPER_INCLUDE_DIRECTORY + NAMES HexagonWrapper.h + HINTS "${HEXAGON_TOOLS_ROOT}" + PATH_SUFFIXES Tools/include/iss include/iss iss +) + +## +# Validate we found everything correctly + +find_package_handle_standard_args( + HexagonSDK + REQUIRED_VARS + HEXAGON_SDK_ROOT + HEXAGON_TOOLS_ROOT + HEXAGON_TOOLCHAIN + HEXAGON_ISS_WRAPPER_LIBRARY + HEXAGON_ISS_WRAPPER_INCLUDE_DIRECTORY + HANDLE_COMPONENTS +) + +## +# Create imported targets + +if (HexagonSDK_FOUND AND NOT TARGET HexagonSDK::wrapper) + add_library(HexagonSDK::wrapper UNKNOWN IMPORTED) + set_target_properties( + HexagonSDK::wrapper + PROPERTIES + IMPORTED_LOCATION "${HEXAGON_ISS_WRAPPER_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${HEXAGON_ISS_WRAPPER_INCLUDE_DIRECTORY}" + ) +endif () diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 71af475c2eb4..b1331ed07e52 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -338,4 +338,8 @@ add_library(Halide_Runtime INTERFACE) add_library(Halide::Runtime ALIAS Halide_Runtime) target_include_directories(Halide_Runtime INTERFACE $) set_target_properties(Halide_Runtime PROPERTIES EXPORT_NAME Runtime) +option(Halide_BUILD_HEXAGON_REMOTE_RUNTIME "Build the hexagon remote runtime for offloading to Hexagon (HVX)" OFF) +if (Halide_BUILD_HEXAGON_REMOTE_RUNTIME AND NOT Halide_CLANG_TIDY_BUILD) + add_subdirectory(hexagon_remote) +endif () diff --git a/src/runtime/hexagon_host.cpp b/src/runtime/hexagon_host.cpp index 98a5bcd943ef..7035741a9fdf 100644 --- a/src/runtime/hexagon_host.cpp +++ b/src/runtime/hexagon_host.cpp @@ -147,7 +147,11 @@ WEAK int init_hexagon_runtime(void *user_context) { if (!host_lib) { host_lib = halide_load_library("libhalide_hexagon_host.dll"); } - + if (!host_lib) { + // This will now cause a more specific error 'halide_error_code_symbol_not_found' down the line. + // So, just print this message and continue on instead of returning a generic error here. + error(user_context) << "Hexagon: unable to load libhalide_hexagon_host.so"; + } debug(user_context) << "Hexagon: init_hexagon_runtime (user_context: " << user_context << ")\n"; // Get the symbols we need from the library. diff --git a/src/runtime/hexagon_remote/CMakeLists.txt b/src/runtime/hexagon_remote/CMakeLists.txt new file mode 100644 index 000000000000..c5fe73ab7405 --- /dev/null +++ b/src/runtime/hexagon_remote/CMakeLists.txt @@ -0,0 +1,74 @@ +include(ExternalProject) + +find_package(HexagonSDK REQUIRED) + +add_custom_command( + OUTPUT + halide_hexagon_remote.h + halide_hexagon_remote_skel.c + halide_hexagon_remote_stub.c + COMMAND ${HEXAGON_QAIC} -I ${HEXAGON_SDK_ROOT}/incs/stddef ${CMAKE_CURRENT_SOURCE_DIR}/halide_hexagon_remote.idl + DEPENDS halide_hexagon_remote.idl + VERBATIM +) + +add_custom_target( + halide_hexagon_remote_idl + DEPENDS + ${CMAKE_CURRENT_BINARY_DIR}/halide_hexagon_remote.h + ${CMAKE_CURRENT_BINARY_DIR}/halide_hexagon_remote_skel.c + ${CMAKE_CURRENT_BINARY_DIR}/halide_hexagon_remote_stub.c +) + +set(common_cache_args + "-DHALIDE_HEXAGON_REMOTE_IDL:PATH=${CMAKE_CURRENT_BINARY_DIR}" + "-DHEXAGON_SDK_ROOT:PATH=${HEXAGON_SDK_ROOT}" + "-DHEXAGON_TOOLS_ROOT:PATH=${HEXAGON_TOOLS_ROOT}" + "-DCMAKE_INSTALL_PREFIX:PATH=" +) + +if (CMAKE_BUILD_TYPE) + list(APPEND common_cache_args "-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}") +endif () + +ExternalProject_Add( + hexagon_remote-qurt + SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/qurt" + CMAKE_CACHE_ARGS + "-DCMAKE_TOOLCHAIN_FILE:FILEPATH=${HEXAGON_TOOLCHAIN}" + ${common_cache_args} + PREFIX hexagon + DEPENDS halide_hexagon_remote_idl + CONFIGURE_HANDLED_BY_BUILD ON +) + +set(arm_abis armeabi-v7a arm64-v8a) +set(arm_bits 32 64) +foreach (abi bits IN ZIP_LISTS arm_abis arm_bits) + ExternalProject_Add( + halide_hexagon_host-${abi} + SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/android" + CMAKE_CACHE_ARGS + "-DCMAKE_TOOLCHAIN_FILE:FILEPATH=${ANDROID_NDK_TOOLCHAIN}" + "-DANDROID_ABI:STRING=${abi}" + "-DANDROID_PLATFORM:STRING=21" + ${common_cache_args} + PREFIX arm-${bits}-android + DEPENDS halide_hexagon_remote_idl + CONFIGURE_HANDLED_BY_BUILD ON + ) +endforeach () + +add_library(halide_hexagon_host SHARED sim_host.cpp sim_protocol.h) +target_compile_features(halide_hexagon_host PRIVATE cxx_std_17) +target_include_directories(halide_hexagon_host PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..) +target_link_libraries(halide_hexagon_host PRIVATE HexagonSDK::wrapper) + +add_custom_target(hexagon_remote) +add_dependencies( + hexagon_remote + hexagon_remote-qurt + halide_hexagon_host + halide_hexagon_host-armeabi-v7a + halide_hexagon_host-arm64-v8a +) diff --git a/src/runtime/hexagon_remote/android/CMakeLists.txt b/src/runtime/hexagon_remote/android/CMakeLists.txt new file mode 100644 index 000000000000..1e7465f68f75 --- /dev/null +++ b/src/runtime/hexagon_remote/android/CMakeLists.txt @@ -0,0 +1,41 @@ +cmake_minimum_required(VERSION 3.22) +project(halide-hexagon_remote-android) + +set(_aarch64 "") +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + set(_aarch64 "_aarch64") +endif () + +add_library(fastrpc::cdsprpc SHARED IMPORTED) +set_target_properties( + fastrpc::cdsprpc + PROPERTIES + IMPORTED_LOCATION "${HEXAGON_SDK_ROOT}/ipc/fastrpc/remote/ship/android${_aarch64}/libcdsprpc.so" +) + +add_library( + halide_hexagon_host + MODULE + ${HALIDE_HEXAGON_REMOTE_IDL}/halide_hexagon_remote_stub.c + host_malloc.cpp + host_shim.cpp + libadsprpc_shim.cpp +) +target_include_directories( + halide_hexagon_host + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../.. +) +target_include_directories( + halide_hexagon_host + SYSTEM PRIVATE + ${HALIDE_HEXAGON_REMOTE_IDL} + ${HEXAGON_SDK_ROOT}/incs + ${HEXAGON_SDK_ROOT}/incs/stddef +) +target_link_libraries(halide_hexagon_host PRIVATE fastrpc::cdsprpc log) + +install( + TARGETS halide_hexagon_host + DESTINATION bin +) diff --git a/src/runtime/hexagon_remote/host_malloc.cpp b/src/runtime/hexagon_remote/android/host_malloc.cpp similarity index 100% rename from src/runtime/hexagon_remote/host_malloc.cpp rename to src/runtime/hexagon_remote/android/host_malloc.cpp diff --git a/src/runtime/hexagon_remote/host_shim.cpp b/src/runtime/hexagon_remote/android/host_shim.cpp similarity index 100% rename from src/runtime/hexagon_remote/host_shim.cpp rename to src/runtime/hexagon_remote/android/host_shim.cpp diff --git a/src/runtime/hexagon_remote/libadsprpc_shim.cpp b/src/runtime/hexagon_remote/android/libadsprpc_shim.cpp similarity index 100% rename from src/runtime/hexagon_remote/libadsprpc_shim.cpp rename to src/runtime/hexagon_remote/android/libadsprpc_shim.cpp diff --git a/src/runtime/hexagon_remote/qurt/CMakeLists.txt b/src/runtime/hexagon_remote/qurt/CMakeLists.txt new file mode 100644 index 000000000000..1a0f42566752 --- /dev/null +++ b/src/runtime/hexagon_remote/qurt/CMakeLists.txt @@ -0,0 +1,43 @@ +cmake_minimum_required(VERSION 3.22) + +# The Hexagon toolchain is broken +set(ENV{HEXAGON_SDK_ROOT} "${HEXAGON_SDK_ROOT}") +set(ENV{HEXAGON_TOOLS_ROOT} "${HEXAGON_TOOLS_ROOT}") + +project(halide-hexagon_remote-qurt) + +add_library(sim_qurt STATIC sim_qurt.cpp sim_qurt_vtcm.cpp) +target_include_directories(sim_qurt SYSTEM PRIVATE ${HALIDE_HEXAGON_REMOTE_IDL}) + +add_executable( + hexagon_sim_remote + known_symbols.cpp + sim_remote.cpp + $ +) +target_include_directories( + hexagon_sim_remote + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/../.. +) +target_include_directories(hexagon_sim_remote SYSTEM PRIVATE ${HALIDE_HEXAGON_REMOTE_IDL}) +target_link_libraries(hexagon_sim_remote PRIVATE ${CMAKE_DL_LIBS}) + +add_library( + halide_hexagon_remote_skel + MODULE + c11_stubs.cpp + halide_remote.cpp + known_symbols.cpp + log.cpp + nearbyint.cpp + ${HALIDE_HEXAGON_REMOTE_IDL}/halide_hexagon_remote_skel.c +) +target_include_directories(halide_hexagon_remote_skel PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../..) +target_include_directories(halide_hexagon_remote_skel SYSTEM PRIVATE ${HALIDE_HEXAGON_REMOTE_IDL}) + +install( + TARGETS sim_qurt hexagon_sim_remote halide_hexagon_remote_skel + DESTINATION bin +) diff --git a/src/runtime/hexagon_remote/c11_stubs.cpp b/src/runtime/hexagon_remote/qurt/c11_stubs.cpp similarity index 100% rename from src/runtime/hexagon_remote/c11_stubs.cpp rename to src/runtime/hexagon_remote/qurt/c11_stubs.cpp diff --git a/src/runtime/hexagon_remote/halide_remote.cpp b/src/runtime/hexagon_remote/qurt/halide_remote.cpp similarity index 99% rename from src/runtime/hexagon_remote/halide_remote.cpp rename to src/runtime/hexagon_remote/qurt/halide_remote.cpp index d990caeeb89b..2af8b8606cdb 100644 --- a/src/runtime/hexagon_remote/halide_remote.cpp +++ b/src/runtime/hexagon_remote/qurt/halide_remote.cpp @@ -15,8 +15,8 @@ extern "C" { #include "known_symbols.h" #include "log.h" -const int stack_alignment = 128; -const int stack_size = 1024 * 1024; +// const int stack_alignment = 128; +// const int stack_size = 1024 * 1024; typedef halide_hexagon_remote_handle_t handle_t; typedef halide_hexagon_remote_buffer buffer; diff --git a/src/runtime/hexagon_remote/known_symbols.cpp b/src/runtime/hexagon_remote/qurt/known_symbols.cpp similarity index 100% rename from src/runtime/hexagon_remote/known_symbols.cpp rename to src/runtime/hexagon_remote/qurt/known_symbols.cpp diff --git a/src/runtime/hexagon_remote/known_symbols.h b/src/runtime/hexagon_remote/qurt/known_symbols.h similarity index 100% rename from src/runtime/hexagon_remote/known_symbols.h rename to src/runtime/hexagon_remote/qurt/known_symbols.h diff --git a/src/runtime/hexagon_remote/log.cpp b/src/runtime/hexagon_remote/qurt/log.cpp similarity index 100% rename from src/runtime/hexagon_remote/log.cpp rename to src/runtime/hexagon_remote/qurt/log.cpp diff --git a/src/runtime/hexagon_remote/log.h b/src/runtime/hexagon_remote/qurt/log.h similarity index 100% rename from src/runtime/hexagon_remote/log.h rename to src/runtime/hexagon_remote/qurt/log.h diff --git a/src/runtime/hexagon_remote/nearbyint.cpp b/src/runtime/hexagon_remote/qurt/nearbyint.cpp similarity index 100% rename from src/runtime/hexagon_remote/nearbyint.cpp rename to src/runtime/hexagon_remote/qurt/nearbyint.cpp diff --git a/src/runtime/hexagon_remote/sim_qurt.cpp b/src/runtime/hexagon_remote/qurt/sim_qurt.cpp similarity index 100% rename from src/runtime/hexagon_remote/sim_qurt.cpp rename to src/runtime/hexagon_remote/qurt/sim_qurt.cpp diff --git a/src/runtime/hexagon_remote/sim_qurt_vtcm.cpp b/src/runtime/hexagon_remote/qurt/sim_qurt_vtcm.cpp similarity index 100% rename from src/runtime/hexagon_remote/sim_qurt_vtcm.cpp rename to src/runtime/hexagon_remote/qurt/sim_qurt_vtcm.cpp diff --git a/src/runtime/hexagon_remote/sim_remote.cpp b/src/runtime/hexagon_remote/qurt/sim_remote.cpp similarity index 100% rename from src/runtime/hexagon_remote/sim_remote.cpp rename to src/runtime/hexagon_remote/qurt/sim_remote.cpp From 6efecbe08068e43bb2ac3ec25091e9933ed7f73c Mon Sep 17 00:00:00 2001 From: Andrew Adams Date: Tue, 22 Aug 2023 08:49:29 -0700 Subject: [PATCH 3/5] slice IRMatcher should only match on slices (#7772) * slice IRMatcher should only match on slices Fixes #7768 * Add test --- src/IRMatch.h | 1 + test/correctness/simplify.cpp | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/src/IRMatch.h b/src/IRMatch.h index ad045e3789d0..a203fec51199 100644 --- a/src/IRMatch.h +++ b/src/IRMatch.h @@ -2101,6 +2101,7 @@ struct SliceOp { } const Shuffle &v = (const Shuffle &)e; return v.vectors.size() == 1 && + v.is_slice() && vec.template match(*v.vectors[0].get(), state) && base.template match::mask>(v.slice_begin(), state) && stride.template match::mask | bindings::mask>(v.slice_stride(), state) && diff --git a/test/correctness/simplify.cpp b/test/correctness/simplify.cpp index 18bf6947ba50..e6431a73c686 100644 --- a/test/correctness/simplify.cpp +++ b/test/correctness/simplify.cpp @@ -76,6 +76,20 @@ Expr slice(const Expr &e, int begin, int stride, int w) { return Shuffle::make_slice(e, begin, stride, w); } +// An arbitrary fixed permutation of the lanes of a single vector that isn't one +// of the classes above. Requires a power of two number of lanes. +Expr permute_lanes(const Expr &e) { + std::vector mask(e.type().lanes()); + for (int i = 0; i < e.type().lanes(); i++) { + mask[i] = i; + // Some arbitrary permutation + if (i & 1) { + std::swap(mask[i], mask[i / 2]); + } + } + return Shuffle::make({e}, std::move(mask)); +} + Expr ramp(const Expr &base, const Expr &stride, int w) { return Ramp::make(base, stride, w); } @@ -159,6 +173,11 @@ void check_casts() { check(slice(cast(UInt(64, 8), some_vector), 2, 1, 3), cast(UInt(64, 3), slice(some_vector, 2, 1, 3))); + // But we currently have no logic for pulling things outside of shuffles + // other than slices. + check(permute_lanes(some_vector) + permute_lanes(some_vector + 1), + permute_lanes(some_vector) + permute_lanes(some_vector + 1)); + std::vector indices(18); for (int i = 0; i < 18; i++) { indices[i] = i & 3; From acc9413084db07fd6c794e1d59b6eca0f72b140e Mon Sep 17 00:00:00 2001 From: Andrew Adams Date: Tue, 22 Aug 2023 08:49:44 -0700 Subject: [PATCH 4/5] Don't inject undef() in the simplifier (#7791) We shouldn't be using undef() in the simplifier. This replaces a load with a constant false predicate with a zero instead. I also added a guard around some dubious logic about out of bounds loads. out of bounds loads may be reachable if they have a false predicate, so I changed this simplification to only trigger if the load is unpredicated. --- src/Simplify_Exprs.cpp | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/src/Simplify_Exprs.cpp b/src/Simplify_Exprs.cpp index 574754686cc6..a8e5fcce1a8d 100644 --- a/src/Simplify_Exprs.cpp +++ b/src/Simplify_Exprs.cpp @@ -314,23 +314,26 @@ Expr Simplify::visit(const Load *op, ExprInfo *bounds) { ExprInfo index_info; Expr index = mutate(op->index, &index_info); - // If the load is fully out of bounds, replace it with undef. - // This should only occur inside branches that make the load unreachable, - // but perhaps the branch was hard to prove constant true or false. This - // provides an alternative mechanism to simplify these unreachable loads. - string alloc_extent_name = op->name + ".total_extent_bytes"; - if (bounds_and_alignment_info.contains(alloc_extent_name)) { - if (index_info.max_defined && index_info.max < 0) { - in_unreachable = true; - return unreachable(op->type); - } - const ExprInfo &alloc_info = bounds_and_alignment_info.get(alloc_extent_name); - if (alloc_info.max_defined && index_info.min_defined) { - int index_min_bytes = index_info.min * op->type.bytes(); - if (index_min_bytes > alloc_info.max) { + // If an unpredicated load is fully out of bounds, replace it with an + // unreachable intrinsic. This should only occur inside branches that make + // the load unreachable, but perhaps the branch was hard to prove constant + // true or false. This provides an alternative mechanism to simplify these + // unreachable loads. + if (is_const_one(op->predicate)) { + string alloc_extent_name = op->name + ".total_extent_bytes"; + if (bounds_and_alignment_info.contains(alloc_extent_name)) { + if (index_info.max_defined && index_info.max < 0) { in_unreachable = true; return unreachable(op->type); } + const ExprInfo &alloc_info = bounds_and_alignment_info.get(alloc_extent_name); + if (alloc_info.max_defined && index_info.min_defined) { + int index_min_bytes = index_info.min * op->type.bytes(); + if (index_min_bytes > alloc_info.max) { + in_unreachable = true; + return unreachable(op->type); + } + } } } @@ -347,7 +350,7 @@ Expr Simplify::visit(const Load *op, ExprInfo *bounds) { const Shuffle *s_index = index.as(); if (is_const_zero(predicate)) { // Predicate is always false - return undef(op->type); + return make_zero(op->type); } else if (b_index && is_const_one(predicate)) { // Load of a broadcast should be broadcast of the load Expr new_index = b_index->value; From e8df5cfb65af7c7fb0de5f20b738546082c44b13 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Wed, 23 Aug 2023 11:05:37 -0700 Subject: [PATCH 5/5] Fix for top-of-tree LLVM (#7798) --- src/LLVM_Output.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/LLVM_Output.cpp b/src/LLVM_Output.cpp index 3106fec840a8..f0aadfb9a869 100644 --- a/src/LLVM_Output.cpp +++ b/src/LLVM_Output.cpp @@ -593,7 +593,11 @@ void create_static_library(const std::vector &src_files_in, const T return; } +#if LLVM_VERSION >= 180 + const llvm::SymtabWritingMode write_symtab = llvm::SymtabWritingMode::NormalSymtab; +#else const bool write_symtab = true; +#endif const auto kind = Internal::get_triple_for_target(target).isOSDarwin() ? llvm::object::Archive::K_BSD : llvm::object::Archive::K_GNU; const bool thin = false; auto result = llvm::writeArchive(dst_file, new_members,