diff --git a/python_bindings/src/halide/halide_/PyEnums.cpp b/python_bindings/src/halide/halide_/PyEnums.cpp index 6aa83534b4b4..b2a18e1435df 100644 --- a/python_bindings/src/halide/halide_/PyEnums.cpp +++ b/python_bindings/src/halide/halide_/PyEnums.cpp @@ -189,6 +189,7 @@ void define_enums(py::module &m) { .value("ARMv87a", Target::Feature::ARMv87a) .value("ARMv88a", Target::Feature::ARMv88a) .value("ARMv89a", Target::Feature::ARMv89a) + .value("ARM64e", Target::Feature::ARM64e) .value("SanitizerCoverage", Target::Feature::SanitizerCoverage) .value("ProfileByTimer", Target::Feature::ProfileByTimer) .value("SPIRV", Target::Feature::SPIRV) diff --git a/src/CodeGen_ARM.cpp b/src/CodeGen_ARM.cpp index 4cd05e3c7b5f..1119a61e3c9d 100644 --- a/src/CodeGen_ARM.cpp +++ b/src/CodeGen_ARM.cpp @@ -45,6 +45,11 @@ namespace { // // v8r has no relation to anything. Target complete_arm_target(Target t) { + // If arm64e is set, assume at least arm8.3a + if (t.has_feature(Target::ARM64e)) { + t.set_feature(Target::ARMv83a); + } + constexpr int num_arm_v8_features = 10; static const Target::Feature arm_v8_features[num_arm_v8_features] = { Target::ARMv89a, @@ -2473,7 +2478,11 @@ string CodeGen_ARM::mcpu_target() const { } } else { if (target.os == Target::IOS) { - return "apple-a7"; + if (target.has_feature(Target::ARM64e)) { + return "apple-a12"; + } else { + return "apple-a7"; + } } else if (target.os == Target::OSX) { return "apple-m1"; } else if (target.has_feature(Target::SVE2)) { diff --git a/src/LLVM_Runtime_Linker.cpp b/src/LLVM_Runtime_Linker.cpp index eda1a3b170c5..36d95f3adcd2 100644 --- a/src/LLVM_Runtime_Linker.cpp +++ b/src/LLVM_Runtime_Linker.cpp @@ -504,7 +504,11 @@ llvm::Triple get_triple_for_target(const Target &target) { } else { user_assert(target.bits == 64) << "Target bits must be 32 or 64\n"; #ifdef WITH_AARCH64 - triple.setArch(llvm::Triple::aarch64); + if (target.has_feature(Target::ARM64e)) { + triple.setArch(llvm::Triple::aarch64, llvm::Triple::AArch64SubArch_arm64e); + } else { + triple.setArch(llvm::Triple::aarch64); + } #else user_error << "AArch64 llvm target not enabled in this build of Halide\n"; #endif diff --git a/src/Target.cpp b/src/Target.cpp index 8d13e1e79677..12490fb82eea 100644 --- a/src/Target.cpp +++ b/src/Target.cpp @@ -698,6 +698,7 @@ const std::map feature_name_map = { {"armv87a", Target::ARMv87a}, {"armv88a", Target::ARMv88a}, {"armv89a", Target::ARMv89a}, + {"arm64e", Target::ARM64e}, {"sanitizer_coverage", Target::SanitizerCoverage}, {"profile_by_timer", Target::ProfileByTimer}, {"spirv", Target::SPIRV}, @@ -1504,7 +1505,7 @@ bool Target::get_runtime_compatible_target(const Target &other, Target &result) // (c) must match across both targets; it is an error if one target has the feature and the other doesn't // clang-format off - const std::array union_features = {{ + const std::array union_features = {{ // These are true union features. CUDA, D3D12Compute, @@ -1545,6 +1546,8 @@ bool Target::get_runtime_compatible_target(const Target &other, Target &result) ARMv87a, ARMv88a, ARMv89a, + + ARM64e, }}; // clang-format on diff --git a/src/Target.h b/src/Target.h index e48fa9ded8de..bdd8c8e03a12 100644 --- a/src/Target.h +++ b/src/Target.h @@ -164,6 +164,7 @@ struct Target { ARMv87a = halide_target_feature_armv87a, ARMv88a = halide_target_feature_armv88a, ARMv89a = halide_target_feature_armv89a, + ARM64e = halide_target_feature_arm64e, SanitizerCoverage = halide_target_feature_sanitizer_coverage, ProfileByTimer = halide_target_feature_profile_by_timer, SPIRV = halide_target_feature_spirv, diff --git a/src/runtime/HalideRuntime.h b/src/runtime/HalideRuntime.h index f9079d5d7cb0..8edd59ffc814 100644 --- a/src/runtime/HalideRuntime.h +++ b/src/runtime/HalideRuntime.h @@ -1444,6 +1444,7 @@ typedef enum halide_target_feature_t { halide_target_feature_armv87a, ///< Enable ARMv8.7a instructions halide_target_feature_armv88a, ///< Enable ARMv8.8a instructions halide_target_feature_armv89a, ///< Enable ARMv8.9a instructions + halide_target_feature_arm64e, ///< Enable ARM64e (requires ARMv8.3a) halide_target_feature_sanitizer_coverage, ///< Enable hooks for SanitizerCoverage support. halide_target_feature_profile_by_timer, ///< Alternative to halide_target_feature_profile using timer interrupt for systems without threads or applicartions that need to avoid them. halide_target_feature_spirv, ///< Enable SPIR-V code generation support. diff --git a/test/correctness/cross_compilation.cpp b/test/correctness/cross_compilation.cpp index fc18ce51f81b..62d1df6bd5fd 100644 --- a/test/correctness/cross_compilation.cpp +++ b/test/correctness/cross_compilation.cpp @@ -31,6 +31,7 @@ int main(int argc, char **argv) { "arm-64-ios-armv87a", "arm-64-ios-armv88a", "arm-64-ios-armv89a", + "arm-64-ios-arm64e", "arm-64-linux", "arm-64-noos-semihosting", "arm-64-windows", diff --git a/test/correctness/simd_op_check.h b/test/correctness/simd_op_check.h index 25b641800987..e43551c702fa 100644 --- a/test/correctness/simd_op_check.h +++ b/test/correctness/simd_op_check.h @@ -130,6 +130,7 @@ class SimdOpCheckTest { Target::ARMv87a, Target::ARMv88a, Target::ARMv89a, + Target::ARM64e, Target::AVX, Target::AVX2, Target::AVX512,