diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml index 457527bcc98c..c83cff317555 100644 --- a/.github/workflows/pip.yml +++ b/.github/workflows/pip.yml @@ -12,7 +12,7 @@ on: env: # TODO: detect this from repo somehow: https://github.com/halide/Halide/issues/8406 - LLVM_VERSION: 19.1.0 + LLVM_VERSION: 19.1.4 FLATBUFFERS_VERSION: 23.5.26 WABT_VERSION: 1.0.36 diff --git a/.lldbinit b/.lldbinit new file mode 100644 index 000000000000..a07959c1b025 --- /dev/null +++ b/.lldbinit @@ -0,0 +1 @@ +command script import ./tools/lldbhalide.py diff --git a/README.md b/README.md index dfbf68324742..fd13c0a92f59 100644 --- a/README.md +++ b/README.md @@ -436,3 +436,4 @@ code to Halide: |------------------------------------------|---------------------------------------------------------------------------------------------------------------| | [CMake developer](doc/CodeStyleCMake.md) | Guidelines for authoring new CMake code. | | [FuzzTesting](doc/FuzzTesting.md) | Information about fuzz testing the Halide compiler (rather than pipelines). Intended for internal developers. | +| [Testing](doc/Testing.md) | Information about our test organization and debugging tips. Intended for internal developers. | diff --git a/doc/Testing.md b/doc/Testing.md new file mode 100644 index 000000000000..ac12e0c7b975 --- /dev/null +++ b/doc/Testing.md @@ -0,0 +1,125 @@ +# Testing + +Halide uses CTest as its primary test platform and runner. + +## Organization + +Halide's tests are organized beneath the top-level `test/` directory. These +folders are described below: + +| Folder | Description | +|----------------------|----------------------------------------------------------------------------------| +| `autoschedulers/$AS` | Test for the `$AS` (e.g. `adams2019`) autoscheduler | +| `common` | Code that may be shared across multiple tests | +| `correctness` | Tests that check correctness of various compiler properties | +| `error` | Tests that expect an exception to be thrown (or `abort()` to be called) | +| `failing_with_issue` | Correctness tests that are associated with a particular issue on GitHub | +| `fuzz` | Fuzz tests. Read more at [FuzzTesting.md](FuzzTesting.md) | +| `generator` | Tests of Halide's AOT compilation infrastructure. | +| `integration` | Tests of Halide's CMake package for downstream use, including cross compilation. | +| `performance` | Tests that check that certain schedules indeed improve performance. | +| `runtime` | Unit tests for the Halide runtime library | +| `warning` | Tests that expected warnings are indeed issued. | + +The tests in each of these directories are given CTest labels corresponding to +the directory name. Thus, one can use `ctest -L generator` to run only the +`generator` tests. The `performance` tests configure CTest to not run them +concurrently with other tests (including each other). + +The vast majority of our tests are simple C++ executables that link to Halide, +perform some checks, and print the special line `Success!` upon successful +completion. There are three main exceptions to this: + +First, the `warning` tests are expected to print a line that reads +`Warning:` and do not look for `Success!`. + +Second, some tests cannot run in all scenarios; for example, a test that +measures CUDA performance requires a CUDA-capable GPU. In these cases, tests are +expected to print `[SKIP]` and exit and not print `Success!` or `Warning:`. + +Finally, the `error` tests are expected to throw an (uncaught) exception that is +not a `Halide::InternalError` (i.e. from a failing `internal_assert`). The logic +for translating uncaught exceptions into successful tests is in +`test/common/expect_abort.cpp`. + +## Debugging with LLDB + +We provide helpers for pretty-printing Halide's IR types in LLDB. The +`.lldbinit` file at the repository root will load automatically if you launch +`lldb` from this directory and your `~/.lldbinit` file contains the line, + +``` +settings set target.load-cwd-lldbinit true +``` + +If you prefer to avoid such global configuration, you can directly load the +helpers with the LLDB command, + +``` +command script import ./tools/lldbhalide.py +``` + +again assuming that the repository root is your current working directory. + +To see the benefit of using these helpers, let us debug `correctness_bounds`: + +``` +$ lldb ./build/test/correctness/correctness_bounds +(lldb) breakpoint set --file bounds.cpp --line 18 +Breakpoint 1: where = correctness_bounds`main + 864 at bounds.cpp:18:12, address = 0x0000000100002054 +(lldb) run +Process 29325 launched: '/Users/areinking/dev/Halide/build/test/correctness/correctness_bounds' (arm64) +Defining function... +Process 29325 stopped +* thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1 + frame #0: 0x0000000100002054 correctness_bounds`main(argc=1, argv=0x000000016fdff160) at bounds.cpp:18:12 + 15 g(x, y) = min(x, y); + 16 h(x, y) = clamp(x + y, 20, 100); + 17 +-> 18 Var xo("xo"), yo("yo"), xi("xi"), yi("yi"); + 19 + 20 Target target = get_jit_target_from_environment(); + 21 if (target.has_gpu_feature()) { +Target 0: (correctness_bounds) stopped. +(lldb) +``` + +Now we can try to inspect the Func `h`. Without the helpers, we see: + +``` +(lldb) v h +(Halide::Func) { + func = { + contents = { + strong = (ptr = 0x0000600002486a20) + weak = nullptr + idx = 0 + } + } + pipeline_ = { + contents = (ptr = 0x0000000000000000) + } +} +``` + +But if we load the helpers and try again, we get a much more useful output: + +``` +(lldb) command script import ./tools/lldbhalide.py +(lldb) v h +... lots of output ... +``` + +The amount of output here is maybe a bit _too_ much, but we gain the ability to +more narrowly inspect data about the func: + +``` +(lldb) v h.func.init_def.values +... +(std::vector) h.func.init_def.values = size=1 { + [0] = max(min(x + y, 100), 20) +} +``` + +These helpers are particularly useful when using graphical debuggers, such as +the one found in CLion. diff --git a/src/ApplySplit.cpp b/src/ApplySplit.cpp index 48d27b1ffc7a..b6491f063fba 100644 --- a/src/ApplySplit.cpp +++ b/src/ApplySplit.cpp @@ -11,13 +11,14 @@ using std::map; using std::string; using std::vector; -vector apply_split(const Split &split, bool is_update, const string &prefix, +vector apply_split(const Split &split, const string &prefix, map &dim_extent_alignment) { vector result; Expr outer = Variable::make(Int(32), prefix + split.outer); Expr outer_max = Variable::make(Int(32), prefix + split.outer + ".loop_max"); - if (split.is_split()) { + switch (split.split_type) { + case Split::SplitVar: { Expr inner = Variable::make(Int(32), prefix + split.inner); Expr old_max = Variable::make(Int(32), prefix + split.old_var + ".loop_max"); Expr old_min = Variable::make(Int(32), prefix + split.old_var + ".loop_min"); @@ -129,8 +130,8 @@ vector apply_split(const Split &split, bool is_update, const s // Define the original variable as the base value computed above plus the inner loop variable. result.emplace_back(old_var_name, base_var + inner, ApplySplitResult::LetStmt); result.emplace_back(base_name, base, ApplySplitResult::LetStmt); - - } else if (split.is_fuse()) { + } break; + case Split::FuseVars: { // Define the inner and outer in terms of the fused var Expr fused = Variable::make(Int(32), prefix + split.old_var); Expr inner_min = Variable::make(Int(32), prefix + split.inner + ".loop_min"); @@ -154,10 +155,12 @@ vector apply_split(const Split &split, bool is_update, const s outer_dim != dim_extent_alignment.end()) { dim_extent_alignment[split.old_var] = inner_dim->second * outer_dim->second; } - } else { - // rename or purify + } break; + case Split::RenameVar: + case Split::PurifyRVar: result.emplace_back(prefix + split.old_var, outer, ApplySplitResult::Substitution); result.emplace_back(prefix + split.old_var, outer, ApplySplitResult::LetStmt); + break; } return result; @@ -173,7 +176,8 @@ vector> compute_loop_bounds_after_split(const Split &spl Expr old_var_extent = Variable::make(Int(32), prefix + split.old_var + ".loop_extent"); Expr old_var_max = Variable::make(Int(32), prefix + split.old_var + ".loop_max"); Expr old_var_min = Variable::make(Int(32), prefix + split.old_var + ".loop_min"); - if (split.is_split()) { + switch (split.split_type) { + case Split::SplitVar: { Expr inner_extent = split.factor; Expr outer_extent = (old_var_max - old_var_min + split.factor) / split.factor; let_stmts.emplace_back(prefix + split.inner + ".loop_min", 0); @@ -182,7 +186,8 @@ vector> compute_loop_bounds_after_split(const Split &spl let_stmts.emplace_back(prefix + split.outer + ".loop_min", 0); let_stmts.emplace_back(prefix + split.outer + ".loop_max", outer_extent - 1); let_stmts.emplace_back(prefix + split.outer + ".loop_extent", outer_extent); - } else if (split.is_fuse()) { + } break; + case Split::FuseVars: { // Define bounds on the fused var using the bounds on the inner and outer Expr inner_extent = Variable::make(Int(32), prefix + split.inner + ".loop_extent"); Expr outer_extent = Variable::make(Int(32), prefix + split.outer + ".loop_extent"); @@ -190,12 +195,16 @@ vector> compute_loop_bounds_after_split(const Split &spl let_stmts.emplace_back(prefix + split.old_var + ".loop_min", 0); let_stmts.emplace_back(prefix + split.old_var + ".loop_max", fused_extent - 1); let_stmts.emplace_back(prefix + split.old_var + ".loop_extent", fused_extent); - } else if (split.is_rename()) { + } break; + case Split::RenameVar: let_stmts.emplace_back(prefix + split.outer + ".loop_min", old_var_min); let_stmts.emplace_back(prefix + split.outer + ".loop_max", old_var_max); let_stmts.emplace_back(prefix + split.outer + ".loop_extent", old_var_extent); + break; + case Split::PurifyRVar: + // Do nothing for purify + break; } - // Do nothing for purify return let_stmts; } diff --git a/src/ApplySplit.h b/src/ApplySplit.h index 5e646b22f08b..b7a81f508ffe 100644 --- a/src/ApplySplit.h +++ b/src/ApplySplit.h @@ -46,31 +46,6 @@ struct ApplySplitResult { ApplySplitResult(Expr val, Type t = Predicate) : name(""), value(std::move(val)), type(t) { } - - bool is_substitution() const { - return (type == Substitution); - } - bool is_substitution_in_calls() const { - return (type == SubstitutionInCalls); - } - bool is_substitution_in_provides() const { - return (type == SubstitutionInProvides); - } - bool is_let() const { - return (type == LetStmt); - } - bool is_predicate() const { - return (type == Predicate); - } - bool is_predicate_calls() const { - return (type == PredicateCalls); - } - bool is_predicate_provides() const { - return (type == PredicateProvides); - } - bool is_blend_provides() const { - return (type == BlendProvides); - } }; /** Given a Split schedule on a definition (init or update), return a list of @@ -79,7 +54,7 @@ struct ApplySplitResult { * defined the values of variables referred by the predicates and substitutions * (ordered from innermost to outermost let). */ std::vector apply_split( - const Split &split, bool is_update, const std::string &prefix, + const Split &split, const std::string &prefix, std::map &dim_extent_alignment); /** Compute the loop bounds of the new dimensions resulting from applying the diff --git a/src/AsyncProducers.cpp b/src/AsyncProducers.cpp index 352219478923..bb5e4279d367 100644 --- a/src/AsyncProducers.cpp +++ b/src/AsyncProducers.cpp @@ -543,20 +543,20 @@ class InitializeSemaphores : public IRMutator { body = LetStmt::make(op->name, std::move(sema_allocate), std::move(body)); // Re-wrap any other lets - for (auto it = lets.rbegin(); it != lets.rend(); it++) { - body = LetStmt::make(it->first, it->second, std::move(body)); + for (const auto &[var, value] : reverse_view(lets)) { + body = LetStmt::make(var, value, std::move(body)); } } } else { body = mutate(frames.back()->body); } - for (auto it = frames.rbegin(); it != frames.rend(); it++) { - Expr value = mutate((*it)->value); - if (value.same_as((*it)->value) && body.same_as((*it)->body)) { - body = *it; + for (const auto *frame : reverse_view(frames)) { + Expr value = mutate(frame->value); + if (value.same_as(frame->value) && body.same_as(frame->body)) { + body = frame; } else { - body = LetStmt::make((*it)->name, std::move(value), std::move(body)); + body = LetStmt::make(frame->name, std::move(value), std::move(body)); } } return body; @@ -654,8 +654,8 @@ class TightenProducerConsumerNodes : public IRMutator { body = make_producer_consumer(name, is_producer, body, scope, uses_vars); } - for (auto it = containing_lets.rbegin(); it != containing_lets.rend(); it++) { - body = LetStmt::make((*it)->name, (*it)->value, body); + for (const auto *container : reverse_view(containing_lets)) { + body = LetStmt::make(container->name, container->value, body); } return body; @@ -846,8 +846,7 @@ class ExpandAcquireNodes : public IRMutator { result = mutate(result); vector> semaphores; - for (auto it = stmts.rbegin(); it != stmts.rend(); it++) { - Stmt s = *it; + for (Stmt s : reverse_view(stmts)) { while (const Acquire *a = s.as()) { semaphores.emplace_back(a->semaphore, a->count); s = a->body; @@ -916,8 +915,8 @@ class ExpandAcquireNodes : public IRMutator { } // Rewrap the rest of the lets - for (auto it = frames.rbegin(); it != frames.rend(); it++) { - s = LetStmt::make((*it)->name, (*it)->value, s); + for (const auto *let : reverse_view(frames)) { + s = LetStmt::make(let->name, let->value, s); } return s; diff --git a/src/BoundConstantExtentLoops.cpp b/src/BoundConstantExtentLoops.cpp index d2901854f6eb..fc2fea5b9d41 100644 --- a/src/BoundConstantExtentLoops.cpp +++ b/src/BoundConstantExtentLoops.cpp @@ -61,8 +61,8 @@ class BoundLoops : public IRMutator { if (e == nullptr) { // We're about to hard fail. Get really aggressive // with the simplifier. - for (auto it = lets.rbegin(); it != lets.rend(); it++) { - extent = Let::make(it->first, it->second, extent); + for (const auto &[var, value] : reverse_view(lets)) { + extent = Let::make(var, value, extent); } extent = remove_likelies(extent); extent = substitute_in_all_lets(extent); diff --git a/src/BoundSmallAllocations.cpp b/src/BoundSmallAllocations.cpp index f83a13d99614..a4774dc2d398 100644 --- a/src/BoundSmallAllocations.cpp +++ b/src/BoundSmallAllocations.cpp @@ -38,8 +38,8 @@ class BoundSmallAllocations : public IRMutator { result = mutate(result); - for (auto it = frames.rbegin(); it != frames.rend(); it++) { - result = T::make(it->op->name, it->op->value, result); + for (const auto &frame : reverse_view(frames)) { + result = T::make(frame.op->name, frame.op->value, result); } return result; diff --git a/src/Bounds.cpp b/src/Bounds.cpp index fe72e6bedfdf..4a8c79b0286f 100644 --- a/src/Bounds.cpp +++ b/src/Bounds.cpp @@ -2104,14 +2104,14 @@ class SolveIfThenElse : public IRMutator { Stmt s = mutate(body); if (s.same_as(body)) { - for (auto it = frames.rbegin(); it != frames.rend(); it++) { - pop_var((*it)->name); + for (const auto *frame : reverse_view(frames)) { + pop_var(frame->name); } return orig; } else { - for (auto it = frames.rbegin(); it != frames.rend(); it++) { - pop_var((*it)->name); - s = LetStmt::make((*it)->name, (*it)->value, s); + for (const auto *frame : reverse_view(frames)) { + pop_var(frame->name); + s = LetStmt::make(frame->name, frame->value, s); } return s; } @@ -2590,15 +2590,15 @@ class BoxesTouched : public IRGraphVisitor { result.accept(this); - for (auto it = frames.rbegin(); it != frames.rend(); it++) { + for (const auto &frame : reverse_view(frames)) { // Pop the value bounds - scope.pop(it->op->name); + scope.pop(frame.op->name); - if (it->op->value.type() == type_of()) { - buffer_lets.erase(it->op->name); + if (frame.op->value.type() == type_of()) { + buffer_lets.erase(frame.op->name); } - if (!it->min_name.empty()) { + if (!frame.min_name.empty()) { // We made up new names for the bounds of the // value, and need to rewrap any boxes we're // returning with appropriate lets. @@ -2606,19 +2606,19 @@ class BoxesTouched : public IRGraphVisitor { Box &box = i.second; for (size_t i = 0; i < box.size(); i++) { if (box[i].has_lower_bound()) { - if (expr_uses_var(box[i].min, it->max_name)) { - box[i].min = Let::make(it->max_name, it->value_bounds.max, box[i].min); + if (expr_uses_var(box[i].min, frame.max_name)) { + box[i].min = Let::make(frame.max_name, frame.value_bounds.max, box[i].min); } - if (expr_uses_var(box[i].min, it->min_name)) { - box[i].min = Let::make(it->min_name, it->value_bounds.min, box[i].min); + if (expr_uses_var(box[i].min, frame.min_name)) { + box[i].min = Let::make(frame.min_name, frame.value_bounds.min, box[i].min); } } if (box[i].has_upper_bound()) { - if (expr_uses_var(box[i].max, it->max_name)) { - box[i].max = Let::make(it->max_name, it->value_bounds.max, box[i].max); + if (expr_uses_var(box[i].max, frame.max_name)) { + box[i].max = Let::make(frame.max_name, frame.value_bounds.max, box[i].max); } - if (expr_uses_var(box[i].max, it->min_name)) { - box[i].max = Let::make(it->min_name, it->value_bounds.min, box[i].max); + if (expr_uses_var(box[i].max, frame.min_name)) { + box[i].max = Let::make(frame.min_name, frame.value_bounds.min, box[i].max); } } } @@ -2626,28 +2626,28 @@ class BoxesTouched : public IRGraphVisitor { } if (is_let_stmt::value) { - let_stmts.pop(it->op->name); + let_stmts.pop(frame.op->name); // If this let stmt shadowed an outer one, we need // to re-insert the children from the previous let // stmt into the map. - if (!it->old_let_vars.empty()) { - internal_assert(it->vi.instance > 0); - VarInstance old_vi = VarInstance(it->vi.var, it->vi.instance - 1); - for (const auto &v : it->old_let_vars) { + if (!frame.old_let_vars.empty()) { + internal_assert(frame.vi.instance > 0); + VarInstance old_vi = VarInstance(frame.vi.var, frame.vi.instance - 1); + for (const auto &v : frame.old_let_vars) { internal_assert(vars_renaming.count(v)); children[get_var_instance(v)].insert(old_vi); } } // Remove the children from the current let stmt. - for (const auto &v : it->collect.vars) { + for (const auto &v : frame.collect.vars) { internal_assert(vars_renaming.count(v)); - children[get_var_instance(v)].erase(it->vi); + children[get_var_instance(v)].erase(frame.vi); } } - pop_var(it->op->name); + pop_var(frame.op->name); } } @@ -3151,8 +3151,8 @@ map boxes_touched(const Expr &e, Stmt s, bool consider_calls, bool return s; } else { // Rewrap the lets around the mutated body - for (auto it = frames.rbegin(); it != frames.rend(); it++) { - s = LetStmt::make((*it)->name, (*it)->value, s); + for (const auto *frame : reverse_view(frames)) { + s = LetStmt::make(frame->name, frame->value, s); } return s; } diff --git a/src/BoundsInference.cpp b/src/BoundsInference.cpp index 84f076d94537..724adb993afd 100644 --- a/src/BoundsInference.cpp +++ b/src/BoundsInference.cpp @@ -328,10 +328,9 @@ class BoundsInference : public IRMutator { } } - const vector &specializations = def.specializations(); - for (size_t i = specializations.size(); i > 0; i--) { - Expr s_cond = specializations[i - 1].condition; - const Definition &s_def = specializations[i - 1].definition; + for (const auto &s : reverse_view(def.specializations())) { + const Expr s_cond = s.condition; + const Definition &s_def = s.definition; // Else case (i.e. specialization condition is false) for (auto &vec : result) { @@ -1309,12 +1308,11 @@ class BoundsInference : public IRMutator { old_inner_productions.end()); // Rewrap the let/if statements - for (size_t i = wrappers.size(); i > 0; i--) { - const auto &p = wrappers[i - 1]; - if (p.first.empty()) { - body = IfThenElse::make(p.second, body); + for (const auto &[var, value] : reverse_view(wrappers)) { + if (var.empty()) { + body = IfThenElse::make(value, body); } else { - body = LetStmt::make(p.first, p.second, body); + body = LetStmt::make(var, value, body); } } diff --git a/src/CPlusPlusMangle.cpp b/src/CPlusPlusMangle.cpp index 79240083ca1e..9a6322823133 100644 --- a/src/CPlusPlusMangle.cpp +++ b/src/CPlusPlusMangle.cpp @@ -216,14 +216,14 @@ MangledNamePart mangle_inner_name(const Type &type, const Target &target, Previo result.full_name = quals + code + type.handle_type->inner_name.name + "@"; result.with_substitutions = quals + code + prev_decls.check_and_enter_name(type.handle_type->inner_name.name); - for (size_t i = type.handle_type->enclosing_types.size(); i > 0; i--) { - result.full_name += type.handle_type->enclosing_types[i - 1].name + "@"; - result.with_substitutions += prev_decls.check_and_enter_name(type.handle_type->enclosing_types[i - 1].name); + for (const auto &enclosing_type : reverse_view(type.handle_type->enclosing_types)) { + result.full_name += enclosing_type.name + "@"; + result.with_substitutions += prev_decls.check_and_enter_name(enclosing_type.name); } - for (size_t i = type.handle_type->namespaces.size(); i > 0; i--) { - result.full_name += type.handle_type->namespaces[i - 1] + "@"; - result.with_substitutions += prev_decls.check_and_enter_name(type.handle_type->namespaces[i - 1]); + for (const auto &ns : reverse_view(type.handle_type->namespaces)) { + result.full_name += ns + "@"; + result.with_substitutions += prev_decls.check_and_enter_name(ns); } result.full_name += "@"; @@ -288,8 +288,8 @@ std::string cplusplus_function_mangled_name(const std::string &name, const std:: PreviousDeclarations prev_decls; result += prev_decls.check_and_enter_name(name); - for (size_t i = namespaces.size(); i > 0; i--) { - result += prev_decls.check_and_enter_name(namespaces[i - 1]); + for (const auto &ns : reverse_view(namespaces)) { + result += prev_decls.check_and_enter_name(ns); } result += "@"; diff --git a/src/CSE.cpp b/src/CSE.cpp index 0905562c4e63..02fb3853e35a 100644 --- a/src/CSE.cpp +++ b/src/CSE.cpp @@ -239,8 +239,8 @@ class CSEEveryExprInStmt : public IRMutator { internal_assert(bundle && bundle->args.size() == 2); Stmt s = Store::make(op->name, bundle->args[0], bundle->args[1], op->param, mutate(op->predicate), op->alignment); - for (auto it = lets.rbegin(); it != lets.rend(); it++) { - s = LetStmt::make(it->first, it->second, s); + for (const auto &[var, value] : reverse_view(lets)) { + s = LetStmt::make(var, value, s); } return s; } @@ -336,13 +336,11 @@ Expr common_subexpression_elimination(const Expr &e_in, bool lift_all) { debug(4) << "With variables " << e << "\n"; // Wrap the final expr in the lets. - for (size_t i = lets.size(); i > 0; i--) { - Expr value = lets[i - 1].second; + for (const auto &[var, value] : reverse_view(lets)) { // Drop this variable as an acceptable replacement for this expr. replacer.erase(value); // Use containing lets in the value. - value = replacer.mutate(lets[i - 1].second); - e = Let::make(lets[i - 1].first, value, e); + e = Let::make(var, replacer.mutate(value), e); } debug(4) << "With lets: " << e << "\n"; diff --git a/src/CanonicalizeGPUVars.cpp b/src/CanonicalizeGPUVars.cpp index aef1f55c5577..4e70af965138 100644 --- a/src/CanonicalizeGPUVars.cpp +++ b/src/CanonicalizeGPUVars.cpp @@ -156,13 +156,13 @@ class CanonicalizeGPUVars : public IRMutator { result = mutate(result); - for (auto it = lets.rbegin(); it != lets.rend(); it++) { - std::string name = canonicalize_let(it->first); - if (name != it->first) { + for (const auto &[var, value] : reverse_view(lets)) { + std::string name = canonicalize_let(var); + if (name != var) { Expr new_var = Variable::make(Int(32), name); - result = substitute(it->first, new_var, result); + result = substitute(var, new_var, result); } - result = LetStmt::make(name, it->second, result); + result = LetStmt::make(name, value, result); } return result; diff --git a/src/CodeGen_C.cpp b/src/CodeGen_C.cpp index b8dbf173d43e..1a93f43dc3ce 100644 --- a/src/CodeGen_C.cpp +++ b/src/CodeGen_C.cpp @@ -1123,8 +1123,8 @@ void CodeGen_C::compile(const LoweredFunc &f, const MetadataNameMap &metadata_na if (!namespaces.empty()) { stream << "\n"; - for (size_t i = namespaces.size(); i > 0; i--) { - stream << "} // namespace " << namespaces[i - 1] << "\n"; + for (const auto &ns : reverse_view(namespaces)) { + stream << "} // namespace " << ns << "\n"; } stream << "\n"; } diff --git a/src/CodeGen_LLVM.cpp b/src/CodeGen_LLVM.cpp index 6b6e85bd8b00..e8b65d93df96 100644 --- a/src/CodeGen_LLVM.cpp +++ b/src/CodeGen_LLVM.cpp @@ -3245,8 +3245,7 @@ void CodeGen_LLVM::visit(const Call *op) { // Build the not-already-inited case builder->SetInsertPoint(global_not_inited_bb); llvm::Value *selected_value = nullptr; - for (int i = sub_fns.size() - 1; i >= 0; i--) { - const auto &sub_fn = sub_fns[i]; + for (const auto &sub_fn : reverse_view(sub_fns)) { if (!selected_value) { selected_value = sub_fn.fn_ptr; } else { diff --git a/src/CodeGen_PyTorch.cpp b/src/CodeGen_PyTorch.cpp index e2546fb3e9e8..ed5bdcf6a6c7 100644 --- a/src/CodeGen_PyTorch.cpp +++ b/src/CodeGen_PyTorch.cpp @@ -210,8 +210,8 @@ void CodeGen_PyTorch::compile(const LoweredFunc &f, bool is_cuda) { if (!namespaces.empty()) { stream << "\n"; - for (size_t i = namespaces.size(); i > 0; i--) { - stream << "} // namespace " << namespaces[i - 1] << "\n"; + for (const auto &ns : reverse_view(namespaces)) { + stream << "} // namespace " << ns << "\n"; } stream << "\n"; } diff --git a/src/Deinterleave.cpp b/src/Deinterleave.cpp index cf8652395bb7..5269605d3eb7 100644 --- a/src/Deinterleave.cpp +++ b/src/Deinterleave.cpp @@ -481,21 +481,21 @@ class Interleaver : public IRMutator { result = mutate(result); - for (auto it = frames.rbegin(); it != frames.rend(); it++) { - Expr value = std::move(it->new_value); + for (const auto &frame : reverse_view(frames)) { + Expr value = std::move(frame.new_value); - result = T::make(it->op->name, value, result); + result = T::make(frame.op->name, value, result); // For vector lets, we may additionally need a let defining the even and odd lanes only if (value.type().is_vector()) { if (value.type().lanes() % 2 == 0) { - result = T::make(it->op->name + ".even_lanes", extract_even_lanes(value, vector_lets), result); - result = T::make(it->op->name + ".odd_lanes", extract_odd_lanes(value, vector_lets), result); + result = T::make(frame.op->name + ".even_lanes", extract_even_lanes(value, vector_lets), result); + result = T::make(frame.op->name + ".odd_lanes", extract_odd_lanes(value, vector_lets), result); } if (value.type().lanes() % 3 == 0) { - result = T::make(it->op->name + ".lanes_0_of_3", extract_mod3_lanes(value, 0, vector_lets), result); - result = T::make(it->op->name + ".lanes_1_of_3", extract_mod3_lanes(value, 1, vector_lets), result); - result = T::make(it->op->name + ".lanes_2_of_3", extract_mod3_lanes(value, 2, vector_lets), result); + result = T::make(frame.op->name + ".lanes_0_of_3", extract_mod3_lanes(value, 0, vector_lets), result); + result = T::make(frame.op->name + ".lanes_1_of_3", extract_mod3_lanes(value, 1, vector_lets), result); + result = T::make(frame.op->name + ".lanes_2_of_3", extract_mod3_lanes(value, 2, vector_lets), result); } } } diff --git a/src/Derivative.cpp b/src/Derivative.cpp index d4f7fa1a2492..2520d27e290f 100644 --- a/src/Derivative.cpp +++ b/src/Derivative.cpp @@ -353,13 +353,12 @@ void ReverseAccumulationVisitor::propagate_adjoints( expr_adjoints[output_expr] = 1.f; } - // Traverse the expressions in reverse order - for (auto it = expr_list.rbegin(); it != expr_list.rend(); it++) { - if (it->type().is_handle()) { + for (Expr &e : reverse_view(expr_list)) { + if (e.type().is_handle()) { // Ignore pointer types continue; } - it->accept(this); + e.accept(this); } auto error = [&]() { @@ -554,8 +553,7 @@ void ReverseAccumulationVisitor::propagate_adjoints( } // Traverse functions from producers to consumers for reverse accumulation - for (int func_id = funcs.size() - 1; func_id >= 0; func_id--) { - const Func &func = funcs[func_id]; + for (const auto &func : reverse_view(funcs)) { current_func = func; FuncKey func_key{func.name(), func.num_update_definitions() - 1}; @@ -701,14 +699,13 @@ void ReverseAccumulationVisitor::propagate_adjoints( } } - // Traverse the expressions in reverse order - for (auto it = expr_list.rbegin(); it != expr_list.rend(); it++) { - if (it->type().is_handle()) { + for (Expr &e : reverse_view(expr_list)) { + if (e.type().is_handle()) { // Ignore pointer types continue; } // Propagate adjoints - it->accept(this); + e.accept(this); } } if (is_current_non_overwriting_scan) { @@ -743,14 +740,13 @@ void ReverseAccumulationVisitor::propagate_adjoints( update_args, i); } - // Traverse the expressions in reverse order - for (auto it = expr_list.rbegin(); it != expr_list.rend(); it++) { - if (it->type().is_handle()) { + for (Expr &e : reverse_view(expr_list)) { + if (e.type().is_handle()) { // Ignore pointer types continue; } // Propagate adjoints - it->accept(this); + e.accept(this); } } } diff --git a/src/DerivativeUtils.cpp b/src/DerivativeUtils.cpp index 86f5902017ff..1a3f18a194a7 100644 --- a/src/DerivativeUtils.cpp +++ b/src/DerivativeUtils.cpp @@ -250,11 +250,11 @@ map inference_bounds(const vector &funcs, bounds[func.name()] = output_bounds[i]; } // Traverse from the consumers to the producers - for (auto it = order.rbegin(); it != order.rend(); it++) { - Func func = Func(env[*it]); + for (const auto &func_name : reverse_view(order)) { + auto func = Func(env[func_name]); // We should already have the bounds of this function - internal_assert(bounds.find(*it) != bounds.end()) << *it << "\n"; - const Box ¤t_bounds = bounds[*it]; + internal_assert(bounds.find(func_name) != bounds.end()) << func_name << "\n"; + const Box ¤t_bounds = bounds[func_name]; internal_assert(func.args().size() == current_bounds.size()); // We know the range for each argument of this function for (int i = 0; i < (int)current_bounds.size(); i++) { diff --git a/src/Func.cpp b/src/Func.cpp index 65113ac2ddc5..c243e6950f3f 100644 --- a/src/Func.cpp +++ b/src/Func.cpp @@ -416,47 +416,57 @@ void check_for_race_conditions_in_split_with_blend(const StageSchedule &sched) { // Process the splits in reverse order to figure out which root vars have a // parallel child. - for (auto it = sched.splits().rbegin(); it != sched.splits().rend(); it++) { - if (it->is_fuse()) { - if (parallel.count(it->old_var)) { - parallel.insert(it->inner); - parallel.insert(it->old_var); + for (const auto &split : reverse_view(sched.splits())) { + switch (split.split_type) { + case Split::FuseVars: + if (parallel.count(split.old_var)) { + parallel.insert(split.inner); + parallel.insert(split.old_var); } - } else if (it->is_rename() || it->is_purify()) { - if (parallel.count(it->outer)) { - parallel.insert(it->old_var); + break; + case Split::RenameVar: + case Split::PurifyRVar: + if (parallel.count(split.outer)) { + parallel.insert(split.old_var); } - } else { - if (parallel.count(it->inner) || parallel.count(it->outer)) { - parallel.insert(it->old_var); + break; + case Split::SplitVar: + if (parallel.count(split.inner) || parallel.count(split.outer)) { + parallel.insert(split.old_var); } + break; } } // Now propagate back to all children of the identified root vars, to assert // that none of them use a blending tail strategy. - for (auto it = sched.splits().begin(); it != sched.splits().end(); it++) { - if (it->is_fuse()) { - if (parallel.count(it->inner) || parallel.count(it->outer)) { - parallel.insert(it->old_var); + for (const auto &split : sched.splits()) { + switch (split.split_type) { + case Split::FuseVars: + if (parallel.count(split.inner) || parallel.count(split.outer)) { + parallel.insert(split.old_var); } - } else if (it->is_rename() || it->is_purify()) { - if (parallel.count(it->old_var)) { - parallel.insert(it->outer); + break; + case Split::RenameVar: + case Split::PurifyRVar: + if (parallel.count(split.old_var)) { + parallel.insert(split.outer); } - } else { - if (parallel.count(it->old_var)) { - parallel.insert(it->inner); - parallel.insert(it->old_var); - if (it->tail == TailStrategy::ShiftInwardsAndBlend || - it->tail == TailStrategy::RoundUpAndBlend) { - user_error << "Tail strategy " << it->tail - << " may not be used to split " << it->old_var + break; + case Split::SplitVar: + if (parallel.count(split.old_var)) { + parallel.insert(split.inner); + parallel.insert(split.old_var); + if (split.tail == TailStrategy::ShiftInwardsAndBlend || + split.tail == TailStrategy::RoundUpAndBlend) { + user_error << "Tail strategy " << split.tail + << " may not be used to split " << split.old_var << " because other vars stemming from the same original " << "Var or RVar are marked as parallel." << "This could cause a race condition.\n"; } } + break; } } } @@ -479,7 +489,7 @@ void Stage::set_dim_type(const VarOrRVar &var, ForType t) { if (!definition.schedule().allow_race_conditions() && definition.schedule().atomic()) { if (!definition.schedule().override_atomic_associativity_test()) { - // We only allow allow associative atomic operations + // We only allow associative atomic operations const string &func_name = function.name(); vector &args = definition.args(); vector &values = definition.values(); @@ -612,16 +622,20 @@ void apply_split_result(const vector> &bounds_let_stmts, vector &values) { for (const auto &res : splits_result) { - if (res.is_substitution() || res.is_let()) { + switch (res.type) { + case ApplySplitResult::Substitution: + case ApplySplitResult::LetStmt: // Apply substitutions to the list of predicates, args, and values. // Make sure we substitute in all the let stmts as well since we are // not going to add them to the exprs. substitute_var_in_exprs(res.name, res.value, predicates); substitute_var_in_exprs(res.name, res.value, args); substitute_var_in_exprs(res.name, res.value, values); - } else { - internal_assert(res.is_predicate()); + break; + default: + internal_assert(res.type == ApplySplitResult::Predicate); predicates.push_back(res.value); + break; } } @@ -640,7 +654,7 @@ void apply_split_result(const vector> &bounds_let_stmts, bool apply_split(const Split &s, vector &rvars, vector &predicates, vector &args, vector &values, map &dim_extent_alignment) { - internal_assert(s.is_split()); + internal_assert(s.split_type == Split::SplitVar); const auto it = std::find_if(rvars.begin(), rvars.end(), [&s](const ReductionVariable &rv) { return (s.old_var == rv.var); }); @@ -659,7 +673,7 @@ bool apply_split(const Split &s, vector &rvars, rvars.insert(it + 1, {s.outer, 0, simplify((old_extent - 1 + s.factor) / s.factor)}); - vector splits_result = apply_split(s, true, "", dim_extent_alignment); + vector splits_result = apply_split(s, "", dim_extent_alignment); vector> bounds_let_stmts = compute_loop_bounds_after_split(s, ""); apply_split_result(bounds_let_stmts, splits_result, predicates, args, values); @@ -673,7 +687,7 @@ bool apply_split(const Split &s, vector &rvars, bool apply_fuse(const Split &s, vector &rvars, vector &predicates, vector &args, vector &values, map &dim_extent_alignment) { - internal_assert(s.is_fuse()); + internal_assert(s.split_type == Split::FuseVars); const auto &iter_outer = std::find_if(rvars.begin(), rvars.end(), [&s](const ReductionVariable &rv) { return (s.outer == rv.var); }); const auto &iter_inner = std::find_if(rvars.begin(), rvars.end(), @@ -694,7 +708,7 @@ bool apply_fuse(const Split &s, vector &rvars, iter_outer->extent = extent; rvars.erase(iter_inner); - vector splits_result = apply_split(s, true, "", dim_extent_alignment); + vector splits_result = apply_split(s, "", dim_extent_alignment); vector> bounds_let_stmts = compute_loop_bounds_after_split(s, ""); apply_split_result(bounds_let_stmts, splits_result, predicates, args, values); @@ -710,7 +724,7 @@ bool apply_fuse(const Split &s, vector &rvars, bool apply_purify(const Split &s, vector &rvars, vector &predicates, vector &args, vector &values, map &dim_extent_alignment) { - internal_assert(s.is_purify()); + internal_assert(s.split_type == Split::PurifyRVar); const auto &iter = std::find_if(rvars.begin(), rvars.end(), [&s](const ReductionVariable &rv) { return (s.old_var == rv.var); }); if (iter != rvars.end()) { @@ -718,7 +732,7 @@ bool apply_purify(const Split &s, vector &rvars, << ", deleting it from the rvars list\n"; rvars.erase(iter); - vector splits_result = apply_split(s, true, "", dim_extent_alignment); + vector splits_result = apply_split(s, "", dim_extent_alignment); vector> bounds_let_stmts = compute_loop_bounds_after_split(s, ""); apply_split_result(bounds_let_stmts, splits_result, predicates, args, values); @@ -731,14 +745,14 @@ bool apply_purify(const Split &s, vector &rvars, bool apply_rename(const Split &s, vector &rvars, vector &predicates, vector &args, vector &values, map &dim_extent_alignment) { - internal_assert(s.is_rename()); + internal_assert(s.split_type == Split::RenameVar); const auto &iter = std::find_if(rvars.begin(), rvars.end(), [&s](const ReductionVariable &rv) { return (s.old_var == rv.var); }); if (iter != rvars.end()) { debug(4) << " Renaming " << iter->var << " into " << s.outer << "\n"; iter->var = s.outer; - vector splits_result = apply_split(s, true, "", dim_extent_alignment); + vector splits_result = apply_split(s, "", dim_extent_alignment); vector> bounds_let_stmts = compute_loop_bounds_after_split(s, ""); apply_split_result(bounds_let_stmts, splits_result, predicates, args, values); @@ -765,14 +779,19 @@ bool apply_split_directive(const Split &s, vector &rvars, } bool found = false; - if (s.is_split()) { + switch (s.split_type) { + case Split::SplitVar: found = apply_split(s, rvars, predicates, args, values, dim_extent_alignment); - } else if (s.is_fuse()) { + break; + case Split::FuseVars: found = apply_fuse(s, rvars, predicates, args, values, dim_extent_alignment); - } else if (s.is_purify()) { + break; + case Split::PurifyRVar: found = apply_purify(s, rvars, predicates, args, values, dim_extent_alignment); - } else { + break; + case Split::RenameVar: found = apply_rename(s, rvars, predicates, args, values, dim_extent_alignment); + break; } if (found) { @@ -1173,19 +1192,24 @@ void Stage::split(const string &old, const string &outer, const string &inner, c // factor does not divide the outer split factor. std::set inner_vars; for (const Split &s : definition.schedule().splits()) { - if (s.is_split()) { + switch (s.split_type) { + case Split::SplitVar: inner_vars.insert(s.inner); if (inner_vars.count(s.old_var)) { inner_vars.insert(s.outer); } - } else if (s.is_rename() || s.is_purify()) { + break; + case Split::RenameVar: + case Split::PurifyRVar: if (inner_vars.count(s.old_var)) { inner_vars.insert(s.outer); } - } else if (s.is_fuse()) { + break; + case Split::FuseVars: if (inner_vars.count(s.inner) || inner_vars.count(s.outer)) { inner_vars.insert(s.old_var); } + break; } } round_up_ok = !inner_vars.count(old_name); @@ -1203,19 +1227,24 @@ void Stage::split(const string &old, const string &outer, const string &inner, c // is OK. Otherwise we can't prove it's safe. std::set inner_vars; for (const Split &s : definition.schedule().splits()) { - if (s.is_split()) { + switch (s.split_type) { + case Split::SplitVar: inner_vars.insert(s.inner); if (inner_vars.count(s.old_var)) { inner_vars.insert(s.outer); } - } else if (s.is_rename() || s.is_purify()) { + break; + case Split::RenameVar: + case Split::PurifyRVar: if (inner_vars.count(s.old_var)) { inner_vars.insert(s.outer); } - } else if (s.is_fuse()) { + break; + case Split::FuseVars: if (inner_vars.count(s.inner) || inner_vars.count(s.outer)) { inner_vars.insert(s.old_var); } + break; } } predicate_loads_ok = !inner_vars.count(old_name); @@ -1258,14 +1287,24 @@ void Stage::split(const string &old, const string &outer, const string &inner, c std::map descends_from_shiftinwards_outer; for (const Split &s : definition.schedule().splits()) { auto it = descends_from_shiftinwards_outer.find(s.old_var); - if (s.is_split() && s.tail == TailStrategy::ShiftInwards) { - descends_from_shiftinwards_outer[s.outer] = s.factor; - } else if (s.is_split() && it != descends_from_shiftinwards_outer.end()) { - descends_from_shiftinwards_outer[s.inner] = it->second; - descends_from_shiftinwards_outer[s.outer] = it->second; - } else if ((s.is_rename() || s.is_purify()) && - it != descends_from_shiftinwards_outer.end()) { - descends_from_shiftinwards_outer[s.outer] = it->second; + switch (s.split_type) { + case Split::SplitVar: + if (s.tail == TailStrategy::ShiftInwards) { + descends_from_shiftinwards_outer[s.outer] = s.factor; + } else if (it != descends_from_shiftinwards_outer.end()) { + descends_from_shiftinwards_outer[s.inner] = it->second; + descends_from_shiftinwards_outer[s.outer] = it->second; + } + break; + case Split::RenameVar: + case Split::PurifyRVar: + if (it != descends_from_shiftinwards_outer.end()) { + descends_from_shiftinwards_outer[s.outer] = it->second; + } + break; + case Split::FuseVars: + // Do nothing + break; } } auto it = descends_from_shiftinwards_outer.find(old_name); @@ -1522,59 +1561,64 @@ void Stage::remove(const string &var) { vector &splits = schedule.splits(); vector temp; - for (size_t i = splits.size(); i > 0; i--) { + for (const auto &split : reverse_view(splits)) { bool is_removed = false; - if (splits[i - 1].is_fuse()) { - debug(4) << " checking fuse " << splits[i - 1].inner << " and " - << splits[i - 1].inner << " into " << splits[i - 1].old_var << "\n"; - if (splits[i - 1].inner == old_name || - splits[i - 1].outer == old_name) { + switch (split.split_type) { + case Split::FuseVars: + debug(4) << " checking fuse " << split.inner << " and " + << split.inner << " into " << split.old_var << "\n"; + if (split.inner == old_name || + split.outer == old_name) { user_error << "In schedule for " << name() << ", can't remove variable " << old_name << " because it has already been fused into " - << splits[i - 1].old_var << "\n" + << split.old_var << "\n" << dump_argument_list(); } - if (should_remove(splits[i - 1].old_var)) { + if (should_remove(split.old_var)) { is_removed = true; - removed_vars.insert(splits[i - 1].outer); - removed_vars.insert(splits[i - 1].inner); + removed_vars.insert(split.outer); + removed_vars.insert(split.inner); } - } else if (splits[i - 1].is_split()) { - debug(4) << " splitting " << splits[i - 1].old_var << " into " - << splits[i - 1].outer << " and " << splits[i - 1].inner << "\n"; - if (should_remove(splits[i - 1].inner)) { + break; + case Split::SplitVar: + debug(4) << " splitting " << split.old_var << " into " + << split.outer << " and " << split.inner << "\n"; + if (should_remove(split.inner)) { is_removed = true; - removed_vars.insert(splits[i - 1].old_var); - } else if (should_remove(splits[i - 1].outer)) { + removed_vars.insert(split.old_var); + } else if (should_remove(split.outer)) { is_removed = true; - removed_vars.insert(splits[i - 1].old_var); + removed_vars.insert(split.old_var); } - if (splits[i - 1].old_var == old_name) { + if (split.old_var == old_name) { user_error << "In schedule for " << name() << ", can't remove a variable " << old_name << " because it has already been renamed or split.\n" << dump_argument_list(); } - } else { - debug(4) << " replace/rename " << splits[i - 1].old_var - << " into " << splits[i - 1].outer << "\n"; - if (should_remove(splits[i - 1].outer)) { + break; + case Split::RenameVar: + case Split::PurifyRVar: + debug(4) << " replace/rename " << split.old_var + << " into " << split.outer << "\n"; + if (should_remove(split.outer)) { is_removed = true; - removed_vars.insert(splits[i - 1].old_var); + removed_vars.insert(split.old_var); } - if (splits[i - 1].old_var == old_name) { + if (split.old_var == old_name) { user_error << "In schedule for " << name() << ", can't remove a variable " << old_name << " because it has already been renamed or split.\n" << dump_argument_list(); } + break; } if (!is_removed) { - temp.insert(temp.begin(), splits[i - 1]); + temp.insert(temp.begin(), split); } } splits.swap(temp); @@ -1625,41 +1669,46 @@ Stage &Stage::rename(const VarOrRVar &old_var, const VarOrRVar &new_var) { // If possible, rewrite the split or rename that defines it. found = false; - vector &splits = schedule.splits(); - for (size_t i = splits.size(); i > 0; i--) { - if (splits[i - 1].is_fuse()) { - if (splits[i - 1].inner == old_name || - splits[i - 1].outer == old_name) { + for (auto &split : reverse_view(schedule.splits())) { + switch (split.split_type) { + case Split::FuseVars: + if (split.inner == old_name || + split.outer == old_name) { user_error << "In schedule for " << name() << ", can't rename variable " << old_name << " because it has already been fused into " - << splits[i - 1].old_var << "\n" + << split.old_var << "\n" << dump_argument_list(); } - if (splits[i - 1].old_var == old_name) { - splits[i - 1].old_var = new_name; + if (split.old_var == old_name) { + split.old_var = new_name; found = true; break; } - } else { - if (splits[i - 1].inner == old_name) { - splits[i - 1].inner = new_name; + + break; + case Split::SplitVar: + case Split::RenameVar: + case Split::PurifyRVar: + if (split.inner == old_name) { + split.inner = new_name; found = true; break; } - if (splits[i - 1].outer == old_name) { - splits[i - 1].outer = new_name; + if (split.outer == old_name) { + split.outer = new_name; found = true; break; } - if (splits[i - 1].old_var == old_name) { + if (split.old_var == old_name) { user_error << "In schedule for " << name() << ", can't rename a variable " << old_name << " because it has already been renamed or split.\n" << dump_argument_list(); } + break; } } diff --git a/src/Generator.cpp b/src/Generator.cpp index 478b585660a6..fabe1ca5cee2 100644 --- a/src/Generator.cpp +++ b/src/Generator.cpp @@ -576,8 +576,8 @@ void StubEmitter::emit() { stream << get_indent() << "};\n"; stream << "\n"; - for (int i = (int)namespaces.size() - 1; i >= 0; --i) { - stream << get_indent() << "} // namespace " << namespaces[i] << "\n"; + for (const auto &ns : reverse_view(namespaces)) { + stream << get_indent() << "} // namespace " << ns << "\n"; } stream << "\n"; diff --git a/src/IRPrinter.cpp b/src/IRPrinter.cpp index a42431f232d0..73ef8652c144 100644 --- a/src/IRPrinter.cpp +++ b/src/IRPrinter.cpp @@ -567,6 +567,11 @@ void IRPrinter::print(const Stmt &ir) { ir.accept(this); } +void IRPrinter::print_summary(const Stmt &ir) { + ScopedValue old(is_summary, true); + ir.accept(this); +} + void IRPrinter::print_list(const std::vector &exprs) { for (size_t i = 0; i < exprs.size(); i++) { print_no_parens(exprs[i]); @@ -865,7 +870,9 @@ void IRPrinter::visit(const Let *op) { stream << "let " << op->name << " = "; print(op->value); stream << " in "; - print(op->body); + if (!is_summary) { + print(op->body); + } close(); } @@ -875,7 +882,9 @@ void IRPrinter::visit(const LetStmt *op) { print_no_parens(op->value); stream << "\n"; - print(op->body); + if (!is_summary) { + print(op->body); + } } void IRPrinter::visit(const AssertStmt *op) { @@ -905,13 +914,9 @@ void IRPrinter::visit(const For *op) { print_no_parens(op->min); stream << ", "; print_no_parens(op->extent); - stream << ") {\n"; + stream << ") "; - indent++; - print(op->body); - indent--; - - stream << get_indent() << "}\n"; + print_braced_stmt(op->body, 1); } void IRPrinter::visit(const Acquire *op) { @@ -919,11 +924,8 @@ void IRPrinter::visit(const Acquire *op) { print_no_parens(op->semaphore); stream << ", "; print_no_parens(op->count); - stream << ") {\n"; - indent++; - print(op->body); - indent--; - stream << get_indent() << "}\n"; + stream << ") "; + print_braced_stmt(op->body, 1); } void IRPrinter::print_lets(const Let *let) { @@ -932,7 +934,9 @@ void IRPrinter::print_lets(const Let *let) { stream << "let " << let->name << " = "; print_no_parens(let->value); stream << " in\n"; - if (const Let *next = let->body.as()) { + if (is_summary) { + stream << get_indent() << "...\n"; + } else if (const Let *next = let->body.as()) { print_lets(next); } else { stream << get_indent(); @@ -941,6 +945,19 @@ void IRPrinter::print_lets(const Let *let) { } } +void IRPrinter::print_braced_stmt(const Stmt &stmt, int extra_indent) { + if (is_summary) { + stream << "{ ... }\n"; + return; + } + + stream << "{\n"; + indent += extra_indent; + print(stmt); + indent -= extra_indent; + stream << get_indent() << "}\n"; +} + void IRPrinter::visit(const Store *op) { stream << get_indent(); const bool has_pred = !is_const_one(op->predicate); @@ -1038,7 +1055,10 @@ void IRPrinter::visit(const Allocate *op) { stream << get_indent() << " custom_delete { " << op->free_function << "(" << op->name << "); }"; } stream << "\n"; - print(op->body); + + if (!is_summary) { + print(op->body); + } } void IRPrinter::visit(const Free *op) { @@ -1067,13 +1087,9 @@ void IRPrinter::visit(const Realize *op) { stream << " if "; print(op->condition); } - stream << " {\n"; - - indent++; - print(op->body); - indent--; - stream << get_indent() << "}\n"; + stream << " "; + print_braced_stmt(op->body); } void IRPrinter::visit(const Prefetch *op) { @@ -1102,12 +1118,16 @@ void IRPrinter::visit(const Prefetch *op) { indent--; stream << get_indent() << "}\n"; } - print(op->body); + if (!is_summary) { + print(op->body); + } } void IRPrinter::visit(const Block *op) { - print(op->first); - print(op->rest); + if (!is_summary) { + print(op->first); + print(op->rest); + } } void IRPrinter::visit(const Fork *op) { @@ -1121,14 +1141,23 @@ void IRPrinter::visit(const Fork *op) { stmts.push_back(rest); stream << get_indent() << "fork "; - for (const Stmt &s : stmts) { - stream << "{\n"; - indent++; - print(s); - indent--; - stream << get_indent() << "} "; + if (is_summary) { + stream << "[" << stmts.size(); + if (stmts.size() == 1) { + stream << " child]"; + } else { + stream << " children]"; + } + } else { + for (const Stmt &s : stmts) { + stream << "{\n"; + indent++; + print(s); + indent--; + stream << get_indent() << "} "; + } + stream << "\n"; } - stream << "\n"; } void IRPrinter::visit(const IfThenElse *op) { @@ -1209,32 +1238,43 @@ void IRPrinter::visit(const VectorReduce *op) { } void IRPrinter::visit(const Atomic *op) { + stream << get_indent(); + if (op->mutex_name.empty()) { - stream << get_indent() << "atomic (" - << op->producer_name << ") {\n"; + stream << "atomic (" << op->producer_name << ") "; } else { - stream << get_indent() << "atomic (" - << op->producer_name << ", " - << op->mutex_name << ") {\n"; + stream << "atomic (" << op->producer_name << ", " << op->mutex_name << ") "; } - indent += 2; - print(op->body); - indent -= 2; - stream << get_indent() << "}\n"; + + print_braced_stmt(op->body); } void IRPrinter::visit(const HoistedStorage *op) { if (op->name.empty()) { - stream << get_indent() << "hoisted_storage {\n"; + stream << get_indent() << "hoisted_storage "; } else { - stream << get_indent() << "hoisted_storage ("; - stream << op->name; - stream << ") {\n"; + stream << get_indent() << "hoisted_storage (" << op->name << ") "; } - indent += 2; - print(op->body); - indent -= 2; - stream << get_indent() << "}\n"; + + print_braced_stmt(op->body); +} + +std::string lldb_string(const Expr &ir) { + std::stringstream s{}; + IRPrinter p(s); + p.print_no_parens(ir); + return s.str(); +} + +std::string lldb_string(const Internal::BaseExprNode *n) { + return lldb_string(Expr(n)); +} + +std::string lldb_string(const Stmt &ir) { + std::stringstream s{}; + IRPrinter p(s); + p.print_summary(ir); + return s.str(); } } // namespace Internal diff --git a/src/IRPrinter.h b/src/IRPrinter.h index 48afef8603d3..b7b5084c1eff 100644 --- a/src/IRPrinter.h +++ b/src/IRPrinter.h @@ -134,6 +134,9 @@ class IRPrinter : public IRVisitor { /** emit a statement on the output stream */ void print(const Stmt &); + /** emit a statement summary on the output stream */ + void print_summary(const Stmt &); + /** emit a comma delimited list of exprs, without any leading or * trailing punctuation. */ void print_list(const std::vector &exprs); @@ -157,6 +160,10 @@ class IRPrinter : public IRVisitor { * surrounding set of parens. */ bool implicit_parens = false; + /** Print only a summary of a statement, with sub-statements replaced by + * ellipses (...). */ + bool is_summary = false; + /** Either emits "(" or "", depending on the value of implicit_parens */ void open(); @@ -170,6 +177,9 @@ class IRPrinter : public IRVisitor { /** A helper for printing a chain of lets with line breaks */ void print_lets(const Let *let); + /** A helper for printing a braced statement */ + void print_braced_stmt(const Stmt &, int extra_indent = 2); + void visit(const IntImm *) override; void visit(const UIntImm *) override; void visit(const FloatImm *) override; @@ -220,6 +230,13 @@ class IRPrinter : public IRVisitor { void visit(const HoistedStorage *) override; }; +/** Debugging helpers for LLDB */ +/// @{ +std::string lldb_string(const Expr &); +std::string lldb_string(const Internal::BaseExprNode *); +std::string lldb_string(const Stmt &); +/// @} + } // namespace Internal } // namespace Halide diff --git a/src/Inline.cpp b/src/Inline.cpp index 5b5d21462c9e..54399cf77b76 100644 --- a/src/Inline.cpp +++ b/src/Inline.cpp @@ -55,16 +55,19 @@ void validate_schedule_inlined_function(Function f) { } for (const auto &split : stage_s.splits()) { - if (split.is_rename()) { + switch (split.split_type) { + case Split::RenameVar: user_warning << "It is meaningless to rename variable " << split.old_var << " of function " << f.name() << " to " << split.outer << " because " << f.name() << " is scheduled inline.\n"; - } else if (split.is_fuse()) { + break; + case Split::FuseVars: user_warning << "It is meaningless to fuse variables " << split.inner << " and " << split.outer << " because " << f.name() << " is scheduled inline.\n"; - } else { + break; + case Split::SplitVar: user_warning << "It is meaningless to split variable " << split.old_var << " of function " << f.name() << " into " @@ -72,6 +75,10 @@ void validate_schedule_inlined_function(Function f) { << split.factor << " + " << split.inner << " because " << f.name() << " is scheduled inline.\n"; + + break; + case Split::PurifyRVar: + break; } } diff --git a/src/LICM.cpp b/src/LICM.cpp index 719b41442cfc..880354d89582 100644 --- a/src/LICM.cpp +++ b/src/LICM.cpp @@ -132,11 +132,11 @@ class LiftLoopInvariants : public IRMutator { result = mutate(result); - for (auto it = frames.rbegin(); it != frames.rend(); it++) { - if (it->new_value.same_as(it->op->value) && result.same_as(it->op->body)) { - result = it->op; + for (const auto &frame : reverse_view(frames)) { + if (frame.new_value.same_as(frame.op->value) && result.same_as(frame.op->body)) { + result = frame.op; } else { - result = T::make(it->op->name, std::move(it->new_value), result); + result = T::make(frame.op->name, std::move(frame.new_value), result); } } @@ -502,11 +502,11 @@ class GroupLoopInvariants : public IRMutator { result = mutate(result); - for (auto it = frames.rbegin(); it != frames.rend(); it++) { - if (it->new_value.same_as(it->op->value) && result.same_as(it->op->body)) { - result = it->op; + for (const auto &frame : reverse_view(frames)) { + if (frame.new_value.same_as(frame.op->value) && result.same_as(frame.op->body)) { + result = frame.op; } else { - result = T::make(it->op->name, it->new_value, result); + result = T::make(frame.op->name, frame.new_value, result); } } diff --git a/src/LoopCarry.cpp b/src/LoopCarry.cpp index 7e9adc960f69..5349e9c316f9 100644 --- a/src/LoopCarry.cpp +++ b/src/LoopCarry.cpp @@ -481,16 +481,14 @@ class LoopCarryOverLoop : public IRMutator { Stmt initial_stores = Block::make(initial_scratch_stores); // Wrap them in the appropriate lets - for (size_t i = initial_lets.size(); i > 0; i--) { - const auto &l = initial_lets[i - 1]; - initial_stores = LetStmt::make(l.first, l.second, initial_stores); + for (const auto &[var, value] : reverse_view(initial_lets)) { + initial_stores = LetStmt::make(var, value, initial_stores); } // We may be lifting the initial stores out of let stmts, // so rewrap them in the necessary ones. - for (size_t i = containing_lets.size(); i > 0; i--) { - const auto &l = containing_lets[i - 1]; - if (stmt_uses_var(initial_stores, l.first)) { - initial_stores = LetStmt::make(l.first, l.second, initial_stores); + for (const auto &[var, value] : reverse_view(containing_lets)) { + if (stmt_uses_var(initial_stores, var)) { + initial_stores = LetStmt::make(var, value, initial_stores); } } diff --git a/src/Memoization.cpp b/src/Memoization.cpp index f41ad0c077f2..fe65f397dba2 100644 --- a/src/Memoization.cpp +++ b/src/Memoization.cpp @@ -537,11 +537,7 @@ class RewriteMemoizedAllocations : public IRMutator { Expr value = mutate(let->value); Stmt body = mutate(let->body); - std::vector &allocations = pending_memoized_allocations[innermost_realization_name]; - - for (size_t i = allocations.size(); i > 0; i--) { - const Allocate *allocation = allocations[i - 1]; - + for (const auto *allocation : reverse_view(pending_memoized_allocations[innermost_realization_name])) { // Make the allocation node body = Allocate::make(allocation->name, allocation->type, allocation->memory_type, allocation->extents, allocation->condition, body, Call::make(Handle(), Call::buffer_get_host, diff --git a/src/Prefetch.cpp b/src/Prefetch.cpp index 144b1950c5cd..9bd5dd78d407 100644 --- a/src/Prefetch.cpp +++ b/src/Prefetch.cpp @@ -218,8 +218,7 @@ class InjectPlaceholderPrefetch : public IRMutator { // If there are multiple prefetches of the same Func or ImageParam, // use the most recent one set seen; - for (int i = prefetch_list.size() - 1; i >= 0; --i) { - const PrefetchDirective &p = prefetch_list[i]; + for (const PrefetchDirective &p : reverse_view(prefetch_list)) { if (!ends_with(op->name, "." + p.at) || (seen.find(p.name) != seen.end())) { continue; } @@ -231,9 +230,9 @@ class InjectPlaceholderPrefetch : public IRMutator { // Note that it is not good enough to just prepend use 'prefix + from', as there may be splits involved, e.g., // prefix = g.s0, from = xo, but the var we seek is actually g.s0.x.xo (because 'g' was split at x). string from_var; - for (int j = (int)loop_nest.size() - 1; j >= 0; --j) { - if (starts_with(loop_nest[j], prefix) && ends_with(loop_nest[j], "." + p.from)) { - from_var = loop_nest[j]; + for (const auto &var : reverse_view(loop_nest)) { + if (starts_with(var, prefix) && ends_with(var, "." + p.from)) { + from_var = var; debug(5) << "Prefetch from " << p.from << " -> from_var " << from_var << "\n"; break; } diff --git a/src/RemoveUndef.cpp b/src/RemoveUndef.cpp index 6b0b6fcbf4c0..9103e401fb33 100644 --- a/src/RemoveUndef.cpp +++ b/src/RemoveUndef.cpp @@ -264,15 +264,15 @@ class RemoveUndef : public IRMutator { result = mutate(result); if (result.defined()) { - for (auto it = frames.rbegin(); it != frames.rend(); it++) { - if (!it->new_value.defined()) { + for (const auto &frame : reverse_view(frames)) { + if (!frame.new_value.defined()) { continue; } - predicate = substitute(it->op->name, it->new_value, predicate); - if (it->new_value.same_as(it->op->value) && result.same_as(it->op->body)) { - result = it->op; + predicate = substitute(frame.op->name, frame.new_value, predicate); + if (frame.new_value.same_as(frame.op->value) && result.same_as(frame.op->body)) { + result = frame.op; } else { - result = T::make(it->op->name, std::move(it->new_value), result); + result = T::make(frame.op->name, std::move(frame.new_value), result); } } } @@ -540,13 +540,12 @@ class RemoveUndef : public IRMutator { result = mutate(result); - for (auto it = frames.rbegin(); it != frames.rend(); it++) { - op = it->first; - Stmt new_first = std::move(it->second); + for (const auto &[block, stmt] : reverse_view(frames)) { + Stmt new_first = stmt; if (!result.defined()) { result = new_first; - } else if (new_first.same_as(op->first) && result.same_as(op->rest)) { - result = op; + } else if (new_first.same_as(block->first) && result.same_as(block->rest)) { + result = block; } else { result = Block::make(new_first, result); } diff --git a/src/Schedule.h b/src/Schedule.h index f32ce2265a0f..ea2692752a9e 100644 --- a/src/Schedule.h +++ b/src/Schedule.h @@ -348,19 +348,6 @@ struct Split { // If split_type is Fuse, then this does the opposite of a // split, it joins the outer and inner into the old_var. SplitType split_type; - - bool is_rename() const { - return split_type == RenameVar; - } - bool is_split() const { - return split_type == SplitVar; - } - bool is_fuse() const { - return split_type == FuseVars; - } - bool is_purify() const { - return split_type == PurifyRVar; - } }; /** Each Dim below has a dim_type, which tells you what diff --git a/src/ScheduleFunctions.cpp b/src/ScheduleFunctions.cpp index 02b7cc98e7d1..c7a257dd085e 100644 --- a/src/ScheduleFunctions.cpp +++ b/src/ScheduleFunctions.cpp @@ -174,8 +174,7 @@ Stmt build_loop_nest( const string &prefix, int start_fuse, const Function &func, - const Definition &def, - bool is_update) { + const Definition &def) { const auto &dims = func.args(); const auto &func_s = func.schedule(); const auto &stage_s = def.schedule(); @@ -220,28 +219,34 @@ Stmt build_loop_nest( user_assert(predicated_vars.count(split.old_var) == 0) << "Cannot split a loop variable resulting from a split using PredicateLoads or PredicateStores."; - vector splits_result = apply_split(split, is_update, prefix, dim_extent_alignment); + vector splits_result = apply_split(split, prefix, dim_extent_alignment); // To ensure we substitute all indices used in call or provide, // we need to substitute all lets in, so we correctly guard x in // an example like let a = 2*x in a + f[a]. stmt = substitute_in_all_lets(stmt); for (const auto &res : splits_result) { - if (res.is_substitution()) { + switch (res.type) { + case ApplySplitResult::Substitution: stmt = graph_substitute(res.name, res.value, stmt); - } else if (res.is_substitution_in_calls()) { + break; + case ApplySplitResult::SubstitutionInCalls: stmt = substitute_in(res.name, res.value, true, false, stmt); - } else if (res.is_substitution_in_provides()) { + break; + case ApplySplitResult::SubstitutionInProvides: stmt = substitute_in(res.name, res.value, false, true, stmt); - } else if (res.is_blend_provides() || - res.is_predicate_calls() || - res.is_predicate_provides()) { + break; + case ApplySplitResult::BlendProvides: + case ApplySplitResult::PredicateCalls: + case ApplySplitResult::PredicateProvides: stmt = add_predicates(res.value, func, res.type, stmt); - } else if (res.is_let()) { + break; + case ApplySplitResult::LetStmt: stmt = LetStmt::make(res.name, res.value, stmt); - } else { - internal_assert(res.is_predicate()); + break; + case ApplySplitResult::Predicate: stmt = IfThenElse::make(res.value, stmt, Stmt()); + break; } } stmt = common_subexpression_elimination(stmt); @@ -253,7 +258,7 @@ Stmt build_loop_nest( // This is not a generic loop invariant code motion step. // In particular there are dangling references to bound // variables that are not defined yet, so we can't rely - // the loop invariant code motion pass. + // on the loop invariant code motion pass. // All containing lets and fors. Outermost first. vector nest; @@ -284,7 +289,7 @@ Stmt build_loop_nest( // Add appropriate predicates on the fused loop vars to ensure we don't // go out of bounds. Ignore the __outermost dims since it's going to be // removed later anyway. These have to be added as outermost as possible as - // some let stmts (e.g. the rebase let stmt) might depend on this vars; + // some let stmts (e.g. the rebase let stmt) might depend on these vars; // otherwise, this may mess up the bounds_touched computation. int n_predicates_inner = 0; for (int i = start_fuse; (i >= 0) && (i < (int)stage_s.dims().size() - 1); ++i) { @@ -335,7 +340,7 @@ Stmt build_loop_nest( } // Sort the predicate guards for the fused loops so they are as far outwards - // as possible. IfInnner should not be reordered to outside of a for loop. + // as possible. IfInner should not be reordered to outside a for loop. for (int i = (int)nest.size() - n_predicates_inner - n_predicates; i < (int)nest.size() - n_predicates; i++) { @@ -391,28 +396,26 @@ Stmt build_loop_nest( } // Rewrap the statement in the containing lets and fors. - for (int i = (int)nest.size() - 1; i >= 0; i--) { - if (nest[i].type == Container::Let) { - internal_assert(nest[i].value.defined()); - stmt = LetStmt::make(nest[i].name, nest[i].value, stmt); - } else if ((nest[i].type == Container::If) || (nest[i].type == Container::IfInner)) { - internal_assert(nest[i].value.defined()); - stmt = IfThenElse::make(nest[i].value, stmt, Stmt()); + for (const auto &container : reverse_view(nest)) { + if (container.type == Container::Let) { + internal_assert(container.value.defined()); + stmt = LetStmt::make(container.name, container.value, stmt); + } else if ((container.type == Container::If) || (container.type == Container::IfInner)) { + internal_assert(container.value.defined()); + stmt = IfThenElse::make(container.value, stmt, Stmt()); } else { - internal_assert(nest[i].type == Container::For); - const Dim &dim = stage_s.dims()[nest[i].dim_idx]; - Expr min = Variable::make(Int(32), nest[i].name + ".loop_min"); - Expr extent = Variable::make(Int(32), nest[i].name + ".loop_extent"); - stmt = For::make(nest[i].name, min, extent, dim.for_type, dim.partition_policy, dim.device_api, stmt); + internal_assert(container.type == Container::For); + const Dim &dim = stage_s.dims()[container.dim_idx]; + Expr min = Variable::make(Int(32), container.name + ".loop_min"); + Expr extent = Variable::make(Int(32), container.name + ".loop_extent"); + stmt = For::make(container.name, min, extent, dim.for_type, dim.partition_policy, dim.device_api, stmt); } } // Define the bounds on the split dimensions using the bounds // on the function args. If it is a purify, we should use the bounds // from the dims instead. - for (size_t i = splits.size(); i > 0; i--) { - const Split &split = splits[i - 1]; - + for (const Split &split : reverse_view(splits)) { vector> let_stmts = compute_loop_bounds_after_split(split, prefix); for (const auto &let_stmt : let_stmts) { stmt = LetStmt::make(let_stmt.first, let_stmt.second, stmt); @@ -503,7 +506,7 @@ Stmt build_provide_loop_nest(const map &env, } // Default schedule/values if there is no specialization - Stmt stmt = build_loop_nest(body, prefix, start_fuse, func, def, is_update); + Stmt stmt = build_loop_nest(body, prefix, start_fuse, func, def); stmt = inject_placeholder_prefetch(stmt, env, prefix, def.schedule().prefetches()); // Make any specialized copies @@ -847,7 +850,7 @@ Stmt build_extern_produce(const map &env, Function f, const Ta Definition f_def_no_pred = f.definition().get_copy(); f_def_no_pred.predicate() = const_true(); - return build_loop_nest(check, f.name() + ".s0.", -1, f, f_def_no_pred, false); + return build_loop_nest(check, f.name() + ".s0.", -1, f, f_def_no_pred); } // A schedule may include explicit bounds on some dimension. This @@ -1310,12 +1313,11 @@ class InjectFunctionRealization : public IRMutator { } // Reinstate the let/if statements - for (size_t i = containers.size(); i > 0; i--) { - const auto &p = containers[i - 1]; - if (p.first.empty()) { - body = IfThenElse::make(p.second, body); + for (const auto &[var, value] : reverse_view(containers)) { + if (var.empty()) { + body = IfThenElse::make(value, body); } else { - body = LetStmt::make(p.first, p.second, body); + body = LetStmt::make(var, value, body); } } @@ -1839,16 +1841,15 @@ class InjectFunctionRealization : public IRMutator { internal_assert(producer.defined()); // Rewrap the loop in the containing lets. - for (size_t i = add_lets.size(); i > 0; --i) { - const auto &b = add_lets[i - 1]; - producer = LetStmt::make(b.first, b.second, producer); + for (const auto &[var, value] : reverse_view(add_lets)) { + producer = LetStmt::make(var, value, producer); } // The original bounds of the loop nests (without any loop-fusion) auto bounds = CollectBounds::collect_bounds(producer); // Compute the shift factors based on the alignment strategies - // starting from the the parent (root loop) to the children. The root + // starting from the parent (root loop) to the children. The root // loop bounds should remain unchanged. map shifts; for (auto i = funcs.size(); i-- > 0;) { @@ -2234,14 +2235,25 @@ bool validate_schedule(Function f, const Stmt &s, const Target &target, bool is_ // (@abadams comments: "I acknowledge that this is gross and should be refactored.") // (Note that the splits are ordered, so a single reverse-pass catches all these cases.) - for (auto split = s.splits().rbegin(); split != s.splits().rend(); split++) { - if (split->is_split() && (parallel_vars.count(split->outer) || parallel_vars.count(split->inner))) { - parallel_vars.insert(split->old_var); - } else if (split->is_fuse() && parallel_vars.count(split->old_var)) { - parallel_vars.insert(split->inner); - parallel_vars.insert(split->outer); - } else if ((split->is_rename() || split->is_purify()) && parallel_vars.count(split->outer)) { - parallel_vars.insert(split->old_var); + for (const auto &split : reverse_view(s.splits())) { + switch (split.split_type) { + case Split::SplitVar: + if (parallel_vars.count(split.outer) || parallel_vars.count(split.inner)) { + parallel_vars.insert(split.old_var); + } + break; + case Split::FuseVars: + if (parallel_vars.count(split.old_var)) { + parallel_vars.insert(split.inner); + parallel_vars.insert(split.outer); + } + break; + case Split::RenameVar: + case Split::PurifyRVar: + if (parallel_vars.count(split.outer)) { + parallel_vars.insert(split.old_var); + } + break; } } @@ -2577,8 +2589,7 @@ Stmt schedule_functions(const vector &outputs, validate_fused_groups_schedule(fused_groups, env); - for (size_t i = fused_groups.size(); i > 0; --i) { - const vector &group = fused_groups[i - 1]; + for (const auto &group : reverse_view(fused_groups)) { vector funcs; vector is_output_list; diff --git a/src/SimplifyCorrelatedDifferences.cpp b/src/SimplifyCorrelatedDifferences.cpp index cbc4393646f1..7c6a3bac735c 100644 --- a/src/SimplifyCorrelatedDifferences.cpp +++ b/src/SimplifyCorrelatedDifferences.cpp @@ -131,13 +131,13 @@ class SimplifyCorrelatedDifferences : public IRMutator { result = mutate(result); - for (auto it = frames.rbegin(); it != frames.rend(); it++) { - if (it->new_value.defined()) { - result = LetStmtOrLet::make(it->op->name, it->new_value, result); + for (const auto &frame : reverse_view(frames)) { + if (frame.new_value.defined()) { + result = LetStmtOrLet::make(frame.op->name, frame.new_value, result); } else { - result = LetStmtOrLet::make(it->op->name, it->op->value, result); + result = LetStmtOrLet::make(frame.op->name, frame.op->value, result); } - if (it->binding.bound()) { + if (frame.binding.bound()) { lets.pop_back(); } } @@ -210,8 +210,8 @@ class SimplifyCorrelatedDifferences : public IRMutator { std::set vars; track_free_vars(e, &vars); - for (auto it = lets.rbegin(); it != lets.rend(); it++) { - if (!it->may_substitute && vars.count(it->name)) { + for (const auto &[var, value, may_substitute] : reverse_view(lets)) { + if (!may_substitute && vars.count(var)) { // We have to stop here. Can't continue // because there might be an outer let with // the same name that we *can* substitute in, @@ -219,8 +219,8 @@ class SimplifyCorrelatedDifferences : public IRMutator { // value. break; } - track_free_vars(it->value, &vars); - e = Let::make(it->name, it->value, e); + track_free_vars(value, &vars); + e = Let::make(var, value, e); } e = common_subexpression_elimination(e); e = solve_expression(e, loop_var).result; diff --git a/src/SimplifySpecializations.cpp b/src/SimplifySpecializations.cpp index cbf5202a0bdf..0e4e68f88666 100644 --- a/src/SimplifySpecializations.cpp +++ b/src/SimplifySpecializations.cpp @@ -118,9 +118,9 @@ vector propagate_specialization_in_definition(Definition &def, const specializations.insert(specializations.end(), s_def.specializations().begin(), s_def.specializations().end()); } - for (size_t i = specializations.size(); i > 0; i--) { - Expr c = specializations[i - 1].condition; - Definition &s_def = specializations[i - 1].definition; + for (auto &s : reverse_view(specializations)) { + Expr c = s.condition; + Definition &s_def = s.definition; const EQ *eq = c.as(); const Variable *var = eq ? eq->a.as() : c.as(); diff --git a/src/Simplify_Let.cpp b/src/Simplify_Let.cpp index e43403dfa8dc..32aa730c6794 100644 --- a/src/Simplify_Let.cpp +++ b/src/Simplify_Let.cpp @@ -280,33 +280,33 @@ Body Simplify::simplify_let(const LetOrLetStmt *op, ExprInfo *info) { } find_var_uses(result, unused_vars); - for (auto it = frames.rbegin(); it != frames.rend(); it++) { - if (it->value_bounds_tracked) { - bounds_and_alignment_info.pop(it->op->name); + for (const auto &frame : reverse_view(frames)) { + if (frame.value_bounds_tracked) { + bounds_and_alignment_info.pop(frame.op->name); } - if (it->new_value_bounds_tracked) { - bounds_and_alignment_info.pop(it->new_name); + if (frame.new_value_bounds_tracked) { + bounds_and_alignment_info.pop(frame.new_name); } - if (it->new_value.defined() && (it->info.new_uses > 0 && !unused_vars.count(it->new_name))) { + if (frame.new_value.defined() && (frame.info.new_uses > 0 && !unused_vars.count(frame.new_name))) { // The new name/value may be used - result = LetOrLetStmt::make(it->new_name, it->new_value, result); - find_var_uses(it->new_value, unused_vars); + result = LetOrLetStmt::make(frame.new_name, frame.new_value, result); + find_var_uses(frame.new_value, unused_vars); } if ((!remove_dead_code && std::is_same::value) || - (it->info.old_uses > 0 && !unused_vars.count(it->op->name))) { + (frame.info.old_uses > 0 && !unused_vars.count(frame.op->name))) { // The old name is still in use. We'd better keep it as well. - result = LetOrLetStmt::make(it->op->name, it->value, result); - find_var_uses(it->value, unused_vars); + result = LetOrLetStmt::make(frame.op->name, frame.value, result); + find_var_uses(frame.value, unused_vars); } const LetOrLetStmt *new_op = result.template as(); if (new_op && - new_op->name == it->op->name && - new_op->body.same_as(it->op->body) && - new_op->value.same_as(it->op->value)) { - result = it->op; + new_op->name == frame.op->name && + new_op->body.same_as(frame.op->body) && + new_op->value.same_as(frame.op->value)) { + result = frame.op; } } diff --git a/src/Simplify_Stmts.cpp b/src/Simplify_Stmts.cpp index 6be1ebb070e2..9f0a5ace1158 100644 --- a/src/Simplify_Stmts.cpp +++ b/src/Simplify_Stmts.cpp @@ -422,8 +422,8 @@ Stmt Simplify::visit(const Evaluate *op) { } else { // Rewrap the lets outside the evaluate node Stmt stmt = Evaluate::make(value); - for (size_t i = lets.size(); i > 0; i--) { - stmt = LetStmt::make(lets[i - 1].first, lets[i - 1].second, stmt); + for (const auto &[var, value] : reverse_view(lets)) { + stmt = LetStmt::make(var, value, stmt); } return stmt; } diff --git a/src/SkipStages.cpp b/src/SkipStages.cpp index 3badfc5d77d9..1c84d8c04fba 100644 --- a/src/SkipStages.cpp +++ b/src/SkipStages.cpp @@ -543,10 +543,10 @@ class SkipStages : public IRMutator { } // Rewrap any uninteresting lets - for (auto it = containing_lets.rbegin(); it != containing_lets.rend(); it++) { - mutate(it->second); // Visit the value of each let + for (auto &[var, value] : reverse_view(containing_lets)) { + mutate(value); // Visit the value of each let if (changed) { - body = T::make(it->first, std::move(it->second), std::move(body)); + body = T::make(var, std::move(value), std::move(body)); } } diff --git a/src/SplitTuples.cpp b/src/SplitTuples.cpp index 0f8bea6edf7f..b0c9a378b3ba 100644 --- a/src/SplitTuples.cpp +++ b/src/SplitTuples.cpp @@ -202,9 +202,9 @@ class SplitTuples : public IRMutator { aliases = aliases && (a[i] == b[i]); } // Might need some of the containing lets - for (auto it = lets.rbegin(); it != lets.rend(); it++) { - if (expr_uses_var(aliases, it->first)) { - aliases = Let::make(it->first, it->second, aliases); + for (const auto &[var, value] : reverse_view(lets)) { + if (expr_uses_var(aliases, var)) { + aliases = Let::make(var, value, aliases); } } return !can_prove(!aliases); @@ -443,8 +443,8 @@ class SplitScatterGather : public IRMutator { body = substitute(op->name, gather_replacement, body); body = mutate(body); - for (auto it = lets.rbegin(); it != lets.rend(); it++) { - body = LetStmt::make(it->first, it->second, body); + for (const auto &[var, value] : reverse_view(lets)) { + body = LetStmt::make(var, value, body); } return body; @@ -472,8 +472,8 @@ class SplitScatterGather : public IRMutator { body = mutate(body); } - for (auto it = lets.rbegin(); it != lets.rend(); it++) { - body = LetStmt::make(it->first, it->second, body); + for (const auto &[var, value] : reverse_view(lets)) { + body = LetStmt::make(var, value, body); } return body; @@ -536,8 +536,8 @@ class SplitScatterGather : public IRMutator { } } - for (auto it = lets.rbegin(); it != lets.rend(); it++) { - s = LetStmt::make(it->first, it->second, s); + for (const auto &[var, value] : reverse_view(lets)) { + s = LetStmt::make(var, value, s); } return s; diff --git a/src/StageStridedLoads.cpp b/src/StageStridedLoads.cpp index 723fc738ce51..881aee5ad437 100644 --- a/src/StageStridedLoads.cpp +++ b/src/StageStridedLoads.cpp @@ -242,37 +242,37 @@ Stmt stage_strided_loads(const Stmt &s) { // Do the same in reverse to pick up any loads that didn't get // picked up in a cluster, but for whom we know it's safe to do a // dense load before their start. - for (auto load = v.rbegin(); load != v.rend(); load++) { - if (replacer.replacements.count({alloc, load->second[0]})) { + for (const auto &[offset, loads] : reverse_view(v)) { + if (replacer.replacements.count({alloc, loads[0]})) { continue; } int64_t delta = k.stride - 1; - const bool can_lift = l.second.upper_bound(load->first - delta) != l.second.begin(); + const bool can_lift = l.second.upper_bound(offset - delta) != l.second.begin(); if (!can_lift) { continue; } int lanes = k.lanes * k.stride; - int64_t first_offset = load->first - delta; + int64_t first_offset = offset - delta; Expr idx = Ramp::make(k.base + (int)first_offset, make_one(k.base.type()), lanes); Type t = k.type.with_lanes(lanes); - const Load *op = load->second[0]; + const Load *op = loads[0]; Expr dense_load = Load::make(t, k.buf, idx, op->image, op->param, const_true(lanes), op->alignment - delta); dense_load = common_subexpression_elimination(dense_load); Expr shuf = Shuffle::make_slice(dense_load, delta, k.stride, k.lanes); - for (const Load *l : load->second) { + for (const Load *l : loads) { replacer.replacements.emplace(std::make_pair(alloc, l), shuf); } } // Look for any loads we can densify because an overlapping load occurs // in any parent scope. - for (auto load = v.rbegin(); load != v.rend(); load++) { - if (replacer.replacements.count({alloc, load->second[0]})) { + for (const auto &[offset, loads] : reverse_view(v)) { + if (replacer.replacements.count({alloc, loads[0]})) { continue; } - int64_t min_offset = load->first; - int64_t max_offset = load->first; + int64_t min_offset = offset; + int64_t max_offset = offset; const IRNode *scope = k.scope; while (scope) { const IRNode *parent = finder.parent_scope[scope]; @@ -289,16 +289,16 @@ Stmt stage_strided_loads(const Stmt &s) { if (max_offset - min_offset < k.stride - 1) { continue; } - int64_t offset = std::max(load->first - (k.stride - 1), min_offset); + int64_t final_offset = std::max(offset - (k.stride - 1), min_offset); int lanes = k.lanes * k.stride; - Expr idx = Ramp::make(k.base + (int)offset, make_one(k.base.type()), lanes); + Expr idx = Ramp::make(k.base + (int)final_offset, make_one(k.base.type()), lanes); Type t = k.type.with_lanes(lanes); - const Load *op = load->second[0]; + const Load *op = loads[0]; Expr dense_load = Load::make(t, k.buf, idx, op->image, op->param, const_true(lanes), op->alignment); dense_load = common_subexpression_elimination(dense_load); - Expr shuf = Shuffle::make_slice(dense_load, load->first - offset, k.stride, k.lanes); - for (const Load *l : load->second) { + Expr shuf = Shuffle::make_slice(dense_load, offset - final_offset, k.stride, k.lanes); + for (const Load *l : loads) { replacer.replacements.emplace(std::make_pair(alloc, l), shuf); } } @@ -307,8 +307,8 @@ Stmt stage_strided_loads(const Stmt &s) { // padding the allocation, and densify any remaining strided loads to // external allocations by doing a dense load at a trimmed size. We rely // on codegen to do a good job at loading vectors of a funny size. - for (auto load = v.begin(); load != v.end(); load++) { // NOLINT - if (replacer.replacements.count({alloc, load->second[0]})) { + for (const auto &[offset, loads] : v) { + if (replacer.replacements.count({alloc, loads[0]})) { continue; } @@ -323,28 +323,28 @@ Stmt stage_strided_loads(const Stmt &s) { p.first->second = std::max(p.first->second, delta); } - int64_t first_offset = load->first; + int64_t first_offset = offset; Expr idx = Ramp::make(k.base + (int)first_offset, make_one(k.base.type()), lanes); Type t = k.type.with_lanes(lanes); - const Load *op = load->second[0]; + const Load *op = loads[0]; Expr dense_load = Load::make(t, k.buf, idx, op->image, op->param, const_true(lanes), op->alignment); dense_load = common_subexpression_elimination(dense_load); - Expr shuf = Shuffle::make_slice(dense_load, load->first - first_offset, k.stride, k.lanes); - for (const Load *l : load->second) { + Expr shuf = Shuffle::make_slice(dense_load, offset - first_offset, k.stride, k.lanes); + for (const Load *l : loads) { replacer.replacements.emplace(std::make_pair(alloc, l), shuf); } } else if (k.lanes % 2 == 0) { // Do two overlapping half-sized dense loads and mush them together. - int64_t first_offset = load->first; + int64_t first_offset = offset; int half_lanes = lanes / 2; internal_assert(delta <= half_lanes); Expr idx1 = Ramp::make(k.base + (int)first_offset, make_one(k.base.type()), half_lanes); Expr idx2 = Ramp::make(k.base + (int)first_offset + half_lanes - delta, make_one(k.base.type()), half_lanes); Type t = k.type.with_lanes(half_lanes); - const Load *op = load->second[0]; + const Load *op = loads[0]; Expr dense_load1 = Load::make(t, k.buf, idx1, op->image, op->param, const_true(half_lanes), op->alignment); Expr dense_load2 = Load::make(t, k.buf, idx2, op->image, op->param, @@ -354,7 +354,7 @@ Stmt stage_strided_loads(const Stmt &s) { Expr shuf1 = Shuffle::make_slice(dense_load1, 0, k.stride, k.lanes / 2); Expr shuf2 = Shuffle::make_slice(dense_load2, delta, k.stride, k.lanes / 2); Expr shuf = Shuffle::make_concat({shuf1, shuf2}); - for (const Load *l : load->second) { + for (const Load *l : loads) { replacer.replacements.emplace(std::make_pair(alloc, l), shuf); } } diff --git a/src/StorageFlattening.cpp b/src/StorageFlattening.cpp index 59278d50fe69..2e82499b96cb 100644 --- a/src/StorageFlattening.cpp +++ b/src/StorageFlattening.cpp @@ -296,9 +296,9 @@ class FlattenDimensions : public IRMutator { auto expand_and_bound = [&](Expr e) { // Iterate from innermost outwards - for (auto it = hoisted_storages.rbegin(); it != hoisted_storages.rend(); it++) { - e = expand_expr(e, it->scope); - if (it->name == op->name) { + for (const auto &storage : reverse_view(hoisted_storages)) { + e = expand_expr(e, storage.scope); + if (storage.name == op->name) { break; } } @@ -307,9 +307,9 @@ class FlattenDimensions : public IRMutator { // Find bounds of expression using the intervals of the loop variables. The loop variables may depend on // the other loop variables, so we just call bounds_of_expr_in_scope for each loop variable separately // in a reverse order. - for (auto it = hoisted_storage_data.loop_vars.rbegin(); it != hoisted_storage_data.loop_vars.rend(); ++it) { + for (const auto &[var, interval] : reverse_view(hoisted_storage_data.loop_vars)) { Scope one_loop_var; - one_loop_var.push(it->first, it->second); + one_loop_var.push(var, interval); Interval bounds = bounds_of_expr_in_scope(e, one_loop_var); e = bounds.max; } @@ -538,11 +538,11 @@ class FlattenDimensions : public IRMutator { Expr expanded_min = op->min; Expr expanded_extent = op->extent; // Iterate from innermost outwards - for (auto it = hoisted_storages.rbegin(); it != hoisted_storages.rend(); it++) { - expanded_min = simplify(expand_expr(expanded_min, it->scope)); - expanded_extent = expand_expr(expanded_extent, it->scope); - Interval loop_bounds = Interval(expanded_min, simplify(expanded_min + expanded_extent - 1)); - it->loop_vars.emplace_back(op->name, loop_bounds); + for (auto &storage : reverse_view(hoisted_storages)) { + expanded_min = simplify(expand_expr(expanded_min, storage.scope)); + expanded_extent = expand_expr(expanded_extent, storage.scope); + auto loop_bounds = Interval(expanded_min, simplify(expanded_min + expanded_extent - 1)); + storage.loop_vars.emplace_back(op->name, loop_bounds); } ScopedValue old_in_gpu(in_gpu, in_gpu || is_gpu(op->for_type)); diff --git a/src/Substitute.cpp b/src/Substitute.cpp index c79292a0a2a3..6a7cba7fd589 100644 --- a/src/Substitute.cpp +++ b/src/Substitute.cpp @@ -66,8 +66,8 @@ class Substitute : public IRMutator { new_body.same_as(body)) { return orig; } else { - for (auto it = frames.rbegin(); it != frames.rend(); it++) { - new_body = T::make(it->op->name, it->new_value, new_body); + for (const auto &frame : reverse_view(frames)) { + new_body = T::make(frame.op->name, frame.new_value, new_body); } return new_body; } diff --git a/src/Tracing.cpp b/src/Tracing.cpp index 3a95d04de0c4..2b509e0e55d0 100644 --- a/src/Tracing.cpp +++ b/src/Tracing.cpp @@ -386,10 +386,10 @@ Stmt inject_tracing(Stmt s, const string &pipeline_name, bool trace_pipeline, builder.func = trace_tags.first; // func name builder.event = halide_trace_tag; // We must reverse-iterate to preserve order - for (auto it = trace_tags.second.rbegin(); it != trace_tags.second.rend(); ++it) { - user_assert(it->find('\0') == string::npos) + for (const auto &tag : reverse_view(trace_tags.second)) { + user_assert(tag.find('\0') == string::npos) << "add_trace_tag() may not contain the null character."; - builder.trace_tag_expr = Expr(*it); + builder.trace_tag_expr = Expr(tag); s = Block::make(Evaluate::make(builder.build()), s); } } @@ -409,9 +409,8 @@ Stmt inject_tracing(Stmt s, const string &pipeline_name, bool trace_pipeline, Expr space = Expr(" "); std::map bt = boxes_touched(s); - for (auto topo_it = order.rbegin(); topo_it != order.rend(); ++topo_it) { - const string &o = *topo_it; - auto p = tracing.funcs_touched.find(*topo_it); + for (const auto &o : reverse_view(order)) { + auto p = tracing.funcs_touched.find(o); if (p == tracing.funcs_touched.end() && ends_with(o, "_im")) { p = tracing.funcs_touched.find(o.substr(0, o.size() - 3)); } diff --git a/src/TrimNoOps.cpp b/src/TrimNoOps.cpp index bbcf0dd3fdfb..1d7232a89b11 100644 --- a/src/TrimNoOps.cpp +++ b/src/TrimNoOps.cpp @@ -197,43 +197,42 @@ class SimplifyUsingBounds : public IRMutator { // Can we prove a condition over the non-rectangular domain of the for loops we're in? bool provably_true_over_domain(Expr test) { debug(3) << "Attempting to prove: " << test << "\n"; - for (size_t i = containing_loops.size(); i > 0; i--) { + for (const auto &[var, interval] : reverse_view(containing_loops)) { // Because the domain is potentially non-rectangular, we // need to take each variable one-by-one, simplifying in // between to allow for cancellations of the bounds of // inner loops with outer loop variables. - auto loop = containing_loops[i - 1]; if (is_const(test)) { break; - } else if (!expr_uses_var(test, loop.var)) { + } else if (!expr_uses_var(test, var)) { continue; - } else if (loop.i.is_bounded() && - can_prove(loop.i.min == loop.i.max) && - expr_uses_var(test, loop.var)) { + } else if (interval.is_bounded() && + can_prove(interval.min == interval.max) && + expr_uses_var(test, var)) { // If min == max then either the domain only has one correct value, which we // can substitute directly. // Need to call CSE here since simplify() is sometimes unable to simplify expr with // non-trivial 'let' value, e.g. (let x = min(10, y-1) in (x < y)) - test = common_subexpression_elimination(Let::make(loop.var, loop.i.min, test)); - } else if (loop.i.is_bounded() && - can_prove(loop.i.min >= loop.i.max) && - expr_uses_var(test, loop.var)) { + test = common_subexpression_elimination(Let::make(var, interval.min, test)); + } else if (interval.is_bounded() && + can_prove(interval.min >= interval.max) && + expr_uses_var(test, var)) { // If min >= max then either the domain only has one correct value, // or the domain is empty, which implies both min/max are true under // the domain. // Need to call CSE here since simplify() is sometimes unable to simplify expr with // non-trivial 'let' value, e.g. (let x = 10 in x < y) || (let x = min(10, y-1) in (x < y)) - test = common_subexpression_elimination(Let::make(loop.var, loop.i.min, test) || - Let::make(loop.var, loop.i.max, test)); + test = common_subexpression_elimination(Let::make(var, interval.min, test) || + Let::make(var, interval.max, test)); } else { Scope s; // Rearrange the expression if possible so that the // loop var only occurs once. - SolverResult solved = solve_expression(test, loop.var); + SolverResult solved = solve_expression(test, var); if (solved.fully_solved) { test = solved.result; } - s.push(loop.var, loop.i); + s.push(var, interval); test = and_condition_over_domain(test, s); } test = simplify(test); diff --git a/src/UniquifyVariableNames.cpp b/src/UniquifyVariableNames.cpp index bfd20969f0b6..2d89e77de787 100644 --- a/src/UniquifyVariableNames.cpp +++ b/src/UniquifyVariableNames.cpp @@ -64,14 +64,14 @@ class UniquifyVariableNames : public IRMutator { result = mutate(result); - for (auto it = frames.rbegin(); it != frames.rend(); it++) { - renaming.pop(it->op->name); - if (it->new_name == it->op->name && - result.same_as(it->op->body) && - it->op->value.same_as(it->value)) { - result = it->op; + for (const auto &frame : reverse_view(frames)) { + renaming.pop(frame.op->name); + if (frame.new_name == frame.op->name && + result.same_as(frame.op->body) && + frame.op->value.same_as(frame.value)) { + result = frame.op; } else { - result = LetOrLetStmt::make(it->new_name, it->value, result); + result = LetOrLetStmt::make(frame.new_name, frame.value, result); } } @@ -176,11 +176,11 @@ namespace { void check(vector> in, vector> out) { Stmt in_stmt = Evaluate::make(0), out_stmt = Evaluate::make(0); - for (auto it = in.rbegin(); it != in.rend(); it++) { - in_stmt = LetStmt::make(it->first.name(), it->second, in_stmt); + for (const auto &[var, value] : reverse_view(in)) { + in_stmt = LetStmt::make(var.name(), value, in_stmt); } - for (auto it = out.rbegin(); it != out.rend(); it++) { - out_stmt = LetStmt::make(it->first.name(), it->second, out_stmt); + for (const auto &[var, value] : reverse_view(out)) { + out_stmt = LetStmt::make(var.name(), value, out_stmt); } Stmt s = uniquify_variable_names(in_stmt); diff --git a/src/Util.h b/src/Util.h index 6c0975d42005..54ba79911c7f 100644 --- a/src/Util.h +++ b/src/Util.h @@ -458,6 +458,30 @@ struct IsRoundtrippable { } }; +template +struct reverse_adaptor { + T ⦥ +}; + +template +auto begin(reverse_adaptor i) { + return std::rbegin(i.range); +} + +template +auto end(reverse_adaptor i) { + return std::rend(i.range); +} + +/** + * Reverse-order adaptor for range-based for-loops. + * TODO: Replace with std::ranges::reverse_view when upgrading to C++20. + */ +template +reverse_adaptor reverse_view(T &&range) { + return {range}; +} + /** Emit a version of a string that is a valid identifier in C (. is replaced with _) * If prefix_underscore is true (the default), an underscore will be prepended if the * input starts with an alphabetic character to avoid reserved word clashes. diff --git a/src/VectorizeLoops.cpp b/src/VectorizeLoops.cpp index 8bb3096f4c4f..b4c7eb82a920 100644 --- a/src/VectorizeLoops.cpp +++ b/src/VectorizeLoops.cpp @@ -1000,12 +1000,12 @@ class VectorSubs : public IRMutator { body = mutate(body); // Append vectorized lets for this loop level. - for (auto let = containing_lets.rbegin(); let != containing_lets.rend(); let++) { + for (const auto &[var, _] : reverse_view(containing_lets)) { // Skip if this var wasn't vectorized. - if (!scope.contains(let->first)) { + if (!scope.contains(var)) { continue; } - string vectorized_name = get_widened_var_name(let->first); + string vectorized_name = get_widened_var_name(var); Expr vectorized_value = vector_scope.get(vectorized_name); vector_scope.pop(vectorized_name); InterleavedRamp ir; @@ -1310,9 +1310,8 @@ class VectorSubs : public IRMutator { s = SerializeLoops().mutate(s); } // We'll need the original scalar versions of any containing lets. - for (size_t i = containing_lets.size(); i > 0; i--) { - const auto &l = containing_lets[i - 1]; - s = LetStmt::make(l.first, l.second, s); + for (const auto &[var, value] : reverse_view(containing_lets)) { + s = LetStmt::make(var, value, s); } for (int ix = vectorized_vars.size() - 1; ix >= 0; ix--) { diff --git a/src/autoschedulers/adams2019/DefaultCostModel.cpp b/src/autoschedulers/adams2019/DefaultCostModel.cpp index cae151d80940..e60e57211bdd 100644 --- a/src/autoschedulers/adams2019/DefaultCostModel.cpp +++ b/src/autoschedulers/adams2019/DefaultCostModel.cpp @@ -62,9 +62,10 @@ void DefaultCostModel::set_pipeline_features(const Internal::Autoscheduler::Func Runtime::Buffer pipeline_features(head1_w, head1_h, num_stages); int stage = 0; for (const auto &n : dag.nodes) { - if (n.is_input) continue; - for (auto it = n.stages.rbegin(); it != n.stages.rend(); it++) { - const auto &s = *it; + if (n.is_input) { + continue; + } + for (const auto &s : Internal::reverse_view(n.stages)) { const int *pipeline_feats = (const int *)(&(s.features)) + 7; // skip the first 7 features for (int i = 0; i < pipeline_feat_size; i++) { @@ -119,9 +120,9 @@ void DefaultCostModel::enqueue(const Internal::Autoscheduler::FunctionDAG &dag, if (stage >= num_stages) break; // Load up the schedule features for all stages of this Func. - for (auto it = n.stages.rbegin(); it != n.stages.rend(); it++) { - internal_assert(schedule_feats.contains(&*it)) << n.func.name() << "\n"; - const auto &feat = schedule_feats.get(&*it); + for (const auto &s : Internal::reverse_view(n.stages)) { + internal_assert(schedule_feats.contains(&s)) << n.func.name() << "\n"; + const auto &feat = schedule_feats.get(&s); for (size_t i = 0; i < ScheduleFeatures::num_features(); i++) { schedule_features(i, stage) = feat[i]; } diff --git a/src/autoschedulers/adams2019/State.cpp b/src/autoschedulers/adams2019/State.cpp index 7c4545fae57b..d3cc5ea42aed 100644 --- a/src/autoschedulers/adams2019/State.cpp +++ b/src/autoschedulers/adams2019/State.cpp @@ -585,46 +585,46 @@ void State::apply_schedule(const FunctionDAG &dag, const Adams2019Params ¶ms } } - for (auto &p : state_map) { - if (p.first->node->is_input) { + for (auto &[stage_ptr, schedule] : state_map) { + if (stage_ptr->node->is_input) { continue; } - Stage stage(p.first->stage); + Stage stage(stage_ptr->stage); // Do all the reorders and pick which vars to // parallelize. vector vars; vector parallel_vars; bool any_parallel_vars = false, any_parallel_rvars = false; - for (auto it = p.second->vars.rbegin(); it != p.second->vars.rend(); it++) { - if (!it->exists || it->extent == 1) { + for (const auto &func_var : reverse_view(schedule->vars)) { + if (!func_var.exists || func_var.extent == 1) { continue; } - if (!it->parallel) { + if (!func_var.parallel) { break; } - any_parallel_rvars |= it->var.is_rvar; - any_parallel_vars |= !it->var.is_rvar; - parallel_vars.push_back(it->var); + any_parallel_rvars |= func_var.var.is_rvar; + any_parallel_vars |= !func_var.var.is_rvar; + parallel_vars.push_back(func_var.var); } - if (p.second->vars.size() > 1) { - p.second->schedule_source << "\n .reorder("; + if (schedule->vars.size() > 1) { + schedule->schedule_source << "\n .reorder("; bool first = true; - for (auto &v : p.second->vars) { + for (auto &v : schedule->vars) { if (v.exists) { vars.push_back(v.var); if (!first) { - p.second->schedule_source << ", "; + schedule->schedule_source << ", "; } else { - p.second->schedule_source << "{"; + schedule->schedule_source << "{"; } first = false; - p.second->schedule_source << v.var.name(); + schedule->schedule_source << v.var.name(); } } - p.second->schedule_source << "})"; + schedule->schedule_source << "})"; stage.reorder(vars); } @@ -635,44 +635,44 @@ void State::apply_schedule(const FunctionDAG &dag, const Adams2019Params ¶ms for (size_t i = 1; i < parallel_vars.size(); i++) { // Outermost, and next outermost. Preserve the inner // name to not invalidate any compute_ats. - p.second->schedule_source << "\n .fuse(" << parallel_vars[i].name() + schedule->schedule_source << "\n .fuse(" << parallel_vars[i].name() << ", " << parallel_vars[i - 1].name() << ", " << parallel_vars[i].name() << ")"; stage.fuse(parallel_vars[i], parallel_vars[i - 1], parallel_vars[i]); } if (!parallel_vars.empty()) { - p.second->schedule_source << "\n .parallel(" << parallel_vars.back().name() << ")"; + schedule->schedule_source << "\n .parallel(" << parallel_vars.back().name() << ")"; stage.parallel(parallel_vars.back()); } } else { for (const auto &v : parallel_vars) { - p.second->schedule_source << "\n .parallel(" << v.name() << ")"; + schedule->schedule_source << "\n .parallel(" << v.name() << ")"; stage.parallel(v); } } // Reorder the vector dimension innermost - if (p.first->index == 0 && p.second->vector_dim > 0) { - vector storage_vars = Func(p.first->node->func).args(); - for (int i = p.second->vector_dim; i > 0; i--) { + if (stage_ptr->index == 0 && schedule->vector_dim > 0) { + vector storage_vars = Func(stage_ptr->node->func).args(); + for (int i = schedule->vector_dim; i > 0; i--) { std::swap(storage_vars[i], storage_vars[i - 1]); } - p.second->schedule_source << "\n .reorder_storage("; + schedule->schedule_source << "\n .reorder_storage("; bool first = true; for (const auto &v : storage_vars) { if (!first) { - p.second->schedule_source << ", "; + schedule->schedule_source << ", "; } first = false; - p.second->schedule_source << v.name(); + schedule->schedule_source << v.name(); } - p.second->schedule_source << ")"; - Func(p.first->node->func).reorder_storage(storage_vars); + schedule->schedule_source << ")"; + Func(stage_ptr->node->func).reorder_storage(storage_vars); } // Dump the schedule source string - src << p.first->name - << p.second->schedule_source.str() + src << stage_ptr->name + << schedule->schedule_source.str() << ";\n"; } // Sanitize the names of things to make them legal source code. diff --git a/src/autoschedulers/anderson2021/DefaultCostModel.cpp b/src/autoschedulers/anderson2021/DefaultCostModel.cpp index 51bf21f21780..aad8c9ab3224 100644 --- a/src/autoschedulers/anderson2021/DefaultCostModel.cpp +++ b/src/autoschedulers/anderson2021/DefaultCostModel.cpp @@ -65,8 +65,7 @@ void DefaultCostModel::set_pipeline_features(const Internal::Autoscheduler::Func if (n.is_input) { continue; } - for (auto it = n.stages.rbegin(); it != n.stages.rend(); it++) { - const auto &s = *it; + for (const auto &s : Internal::reverse_view(n.stages)) { const int *pipeline_feats = (const int *)(&(s.features)) + 7; // skip the first 7 features for (int i = 0; i < pipeline_feat_size; i++) { @@ -130,9 +129,9 @@ void DefaultCostModel::enqueue(const Internal::Autoscheduler::FunctionDAG &dag, } // Load up the schedule features for all stages of this Func. - for (auto it = n.stages.rbegin(); it != n.stages.rend(); it++) { - internal_assert(schedule_feats.contains(&*it)) << n.func.name() << "\n"; - const auto &feat = schedule_feats.get(&*it); + for (const auto &s : Internal::reverse_view(n.stages)) { + internal_assert(schedule_feats.contains(&s)) << n.func.name() << "\n"; + const auto &feat = schedule_feats.get(&s); for (size_t i = 0; i < ScheduleFeatures::num_features(); i++) { schedule_features(i, stage) = feat[i]; } diff --git a/src/autoschedulers/anderson2021/State.cpp b/src/autoschedulers/anderson2021/State.cpp index e3d8801ca4e3..063cddf1291b 100644 --- a/src/autoschedulers/anderson2021/State.cpp +++ b/src/autoschedulers/anderson2021/State.cpp @@ -59,28 +59,28 @@ const LoopNest *State::deepest_valid_compute_location(const Anderson2021Params & int64_t new_shared_mem_alloc_size = 0; int64_t new_register_alloc_size = 0; - for (auto it = ancestors.rbegin(); it != ancestors.rend(); it++) { + for (const auto *ancestor : reverse_view(ancestors)) { if (first) { first = false; continue; } - if ((*it)->gpu_label == GPU_parallelism::Block) { + if (ancestor->gpu_label == GPU_parallelism::Block) { new_shared_mem_alloc_size = node.bytes_per_point; for (int i = 0; i < node.dimensions; ++i) { - new_shared_mem_alloc_size *= (*it)->get_bounds(&node)->region_computed(i).extent(); + new_shared_mem_alloc_size *= ancestor->get_bounds(&node)->region_computed(i).extent(); } - int64_t total = new_shared_mem_alloc_size + total_shared_mem_alloc_sizes.get((*it)->stage); + int64_t total = new_shared_mem_alloc_size + total_shared_mem_alloc_sizes.get(ancestor->stage); if (total > get_shared_memory_limit(params)) { continue; } } - if ((*it)->gpu_label == GPU_parallelism::Thread || (*it)->gpu_label == GPU_parallelism::Serial) { + if (ancestor->gpu_label == GPU_parallelism::Thread || ancestor->gpu_label == GPU_parallelism::Serial) { int64_t total = node.bytes_per_point; for (int i = 0; i < node.dimensions; ++i) { - total *= (*it)->get_bounds(&node)->region_computed(i).extent(); + total *= ancestor->get_bounds(&node)->region_computed(i).extent(); } if (total > get_register_mem_alloc_limit()) { @@ -93,11 +93,11 @@ const LoopNest *State::deepest_valid_compute_location(const Anderson2021Params & // If the region_computed does not shrink, ancestors.at(i) (the loop // nest one level further in) will never be considered as a compute // location - if (!(*it)->region_computed_shrinks(&node, candidate)) { + if (!ancestor->region_computed_shrinks(&node, candidate)) { break; } - candidate = *it; + candidate = ancestor; } if (candidate->gpu_label == GPU_parallelism::Block) { @@ -216,8 +216,7 @@ void State::FeatureLoopNestMutator::split_compute_root_loops(LoopNest *loop_nest return; } - for (auto it = loop_nest->children.rbegin(); it != loop_nest->children.rend(); ++it) { - auto &c = *it; + for (auto &c : reverse_view(loop_nest->children)) { if (c->gpu_label != GPU_parallelism::None) { continue; } @@ -1094,12 +1093,12 @@ void State::apply_schedule(const FunctionDAG &dag, const Anderson2021Params &par } } - for (auto &p : state_map) { - if (p.first->node->is_input) { + for (auto &[stage_ptr, schedule] : state_map) { + if (stage_ptr->node->is_input) { continue; } - Stage stage(p.first->stage); + Stage stage(stage_ptr->stage); // Do all the reorders and pick which vars to // parallelize. @@ -1108,35 +1107,35 @@ void State::apply_schedule(const FunctionDAG &dag, const Anderson2021Params &par vector parallel_extents; vector constant_extents; bool any_parallel_vars = false, any_parallel_rvars = false; - for (auto it = p.second->vars.rbegin(); it != p.second->vars.rend(); it++) { - if (!it->exists) { + for (const auto &func_var : reverse_view(schedule->vars)) { + if (!func_var.exists) { continue; } - if (!it->parallel) { + if (!func_var.parallel) { break; } - any_parallel_rvars |= it->var.is_rvar; - any_parallel_vars |= !it->var.is_rvar; - parallel_extents.push_back(it->extent); - parallel_vars.push_back(it->var); - constant_extents.push_back(it->constant_extent); + any_parallel_rvars |= func_var.var.is_rvar; + any_parallel_vars |= !func_var.var.is_rvar; + parallel_extents.push_back(func_var.extent); + parallel_vars.push_back(func_var.var); + constant_extents.push_back(func_var.constant_extent); } - if (p.second->vars.size() > 1) { - p.second->schedule_source << "\n .reorder("; + if (schedule->vars.size() > 1) { + schedule->schedule_source << "\n .reorder("; bool first = true; - for (auto &v : p.second->vars) { + for (auto &v : schedule->vars) { if (v.exists) { vars.push_back(v.var); - p.second->ordered_vars.push_back(v); + schedule->ordered_vars.push_back(v); if (!first) { - p.second->schedule_source << ", "; + schedule->schedule_source << ", "; } first = false; - p.second->schedule_source << v.var.name(); + schedule->schedule_source << v.var.name(); } } - p.second->schedule_source << ")"; + schedule->schedule_source << ")"; stage.reorder(vars); } @@ -1144,39 +1143,39 @@ void State::apply_schedule(const FunctionDAG &dag, const Anderson2021Params &par // they are both pure. bool can_fuse = !(any_parallel_vars && any_parallel_rvars); if (can_fuse) { - fuse_gpu_blocks(p.second.get(), stage, parallel_vars, parallel_extents, constant_extents); + fuse_gpu_blocks(schedule.get(), stage, parallel_vars, parallel_extents, constant_extents); } else { if (target.has_gpu_feature()) { - mark_gpu_blocks(p.second.get(), stage, parallel_vars, parallel_extents); + mark_gpu_blocks(schedule.get(), stage, parallel_vars, parallel_extents); } else { for (const auto &v : parallel_vars) { - p.second->schedule_source << "\n .parallel(" << v.name() << ")"; + schedule->schedule_source << "\n .parallel(" << v.name() << ")"; stage.parallel(v); } } } if (!parallel_vars.empty()) { - p.second->parallel = true; + schedule->parallel = true; } // Reorder the vector dimension innermost - if (p.first->index == 0 && p.second->vector_dim > 0) { - vector storage_vars = Func(p.first->node->func).args(); - for (int i = p.second->vector_dim; i > 0; i--) { + if (stage_ptr->index == 0 && schedule->vector_dim > 0) { + vector storage_vars = Func(stage_ptr->node->func).args(); + for (int i = schedule->vector_dim; i > 0; i--) { std::swap(storage_vars[i], storage_vars[i - 1]); } - p.second->schedule_source << "\n .reorder_storage("; + schedule->schedule_source << "\n .reorder_storage("; bool first = true; for (const auto &v : storage_vars) { if (!first) { - p.second->schedule_source << ", "; + schedule->schedule_source << ", "; } first = false; - p.second->schedule_source << v.name(); + schedule->schedule_source << v.name(); } - p.second->schedule_source << ")"; - Func(p.first->node->func).reorder_storage(storage_vars); + schedule->schedule_source << ")"; + Func(stage_ptr->node->func).reorder_storage(storage_vars); } } diff --git a/src/autoschedulers/li2018/GradientAutoscheduler.cpp b/src/autoschedulers/li2018/GradientAutoscheduler.cpp index 1f1ad91a1567..fb4477cc951a 100644 --- a/src/autoschedulers/li2018/GradientAutoscheduler.cpp +++ b/src/autoschedulers/li2018/GradientAutoscheduler.cpp @@ -908,11 +908,11 @@ void generate_schedule(const std::vector &outputs, std::ostringstream schedule_source; // Traverse from the consumers to the producers - for (auto it = order.rbegin(); it != order.rend(); it++) { - Func func(env[*it]); - debug(1) << "[gradient_autoscheduler] Processing function:" << *it << "\n"; + for (const auto &func_name : reverse_view(order)) { + Func func(env[func_name]); + debug(1) << "[gradient_autoscheduler] Processing function:" << func_name << "\n"; // Get the bounds in integer constant by substitute all the parameters' estimates. - Box bounds = func_bounds[*it]; + Box bounds = func_bounds[func_name]; std::vector int_bounds = get_int_bounds(bounds); // Scheduling pure definition apply_schedule(params, target, func, -1, int_bounds, target.has_gpu_feature(), schedule_source); diff --git a/test/performance/parallel_scenarios.cpp b/test/performance/parallel_scenarios.cpp index 41e590599eb3..2bd29acd6b2f 100644 --- a/test/performance/parallel_scenarios.cpp +++ b/test/performance/parallel_scenarios.cpp @@ -31,32 +31,36 @@ int main(int argc, char **argv) { int native_threads = Halide::Internal::JITSharedRuntime::get_num_threads(); + std::map, std::vector> results; + auto bench = [&](bool m, bool c, int i, int o) { - const int num_samples = 128; const int memory_limit = m ? max_memory : 128; + auto now = std::chrono::high_resolution_clock::now; + auto to_ns = [](auto delta) { return 1e9 * std::chrono::duration(delta).count(); }; + auto bench_one = [&]() { - auto t1 = std::chrono::high_resolution_clock::now(); - // Ignore error code because default halide_error() will abort on failure + auto t1 = now(); (void)callable(i, o, memory_limit, in, out); - auto t2 = std::chrono::high_resolution_clock::now(); - return 1e9 * std::chrono::duration(t2 - t1).count() / (i * o); + auto t2 = now(); + return to_ns(t2 - t1) / (i * o); }; - std::vector times(num_samples); + const int num_tasks = 8; + const int min_samples = 32; + + std::vector times[num_tasks]; if (c) { Halide::Tools::ThreadPool thread_pool; - const int num_tasks = 8; - const int samples_per_task = num_samples / num_tasks; Halide::Internal::JITSharedRuntime::set_num_threads(num_tasks * native_threads); std::vector> futures(num_tasks); for (size_t t = 0; t < futures.size(); t++) { futures[t] = thread_pool.async( [&](size_t t) { bench_one(); - for (int s = 0; s < samples_per_task; s++) { - size_t idx = t * samples_per_task + s; - times[idx] = bench_one(); + auto t_start = now(); + while (to_ns(now() - t_start) < 1e7 || times[t].size() < min_samples / num_tasks) { + times[t].push_back(bench_one()); } }, t); @@ -67,32 +71,43 @@ int main(int argc, char **argv) { } else { Halide::Internal::JITSharedRuntime::set_num_threads(native_threads); bench_one(); - for (int s = 0; s < num_samples; s++) { - times[s] = bench_one(); + auto t_start = now(); + while (to_ns(now() - t_start) < 1e7 || times[0].size() < min_samples) { + times[0].push_back(bench_one()); } } - std::sort(times.begin(), times.end()); - printf("%d %d %d %d ", m, c, i, o); - const int n = 8; - int off = (num_samples / n) / 2; - for (int i = 0; i < n; i++) { - printf("%g ", times[off + (num_samples * i) / n]); + + std::vector &r = results[{m, c, i, o}]; + for (int i = 0; i < num_tasks; i++) { + r.insert(r.end(), times[i].begin(), times[i].end()); } - printf("\n"); }; // The output is designed to be copy-pasted into a spreadsheet, not read by a human - printf("memory_bound contended inner outer t0 t1 t2 t3 t4 t5 t7\n"); - for (bool contended : {false, true}) { - for (bool memory_bound : {false, true}) { - for (int i : {1 << 0, 1 << 6, 1 << 12, 1 << 18}) { - for (int o : {1, 2, 4, 8, 16, 32, 64, 128, 256}) { - bench(memory_bound, contended, i, o); + printf("memory_bound contended inner outer num_samples 10%% 20%% 30%% 40%% 50%% 60%% 70%% 80%% 90%%\n"); + for (int repeat = 0; repeat < 10; repeat++) { + for (bool contended : {false, true}) { + for (bool memory_bound : {false, true}) { + for (int i : {1 << 6, 1 << 9, 1 << 12, 1 << 15}) { + for (int o : {1, 2, 4, 8, 16, 32, 64, 128, 256}) { + bench(memory_bound, contended, i, o); + } } } } } + for (auto p : results) { + auto × = p.second; + std::sort(times.begin(), times.end()); + auto [m, c, i, o] = p.first; + printf("%d %d %d %d %d ", m, c, i, o, (int)times.size()); + for (int decile = 10; decile <= 90; decile += 10) { + printf("%g ", times[(decile * times.size()) / 100]); + } + printf("\n"); + } + printf("Success!\n"); return 0; diff --git a/tools/lldbhalide.py b/tools/lldbhalide.py new file mode 100644 index 000000000000..75f199a8dcee --- /dev/null +++ b/tools/lldbhalide.py @@ -0,0 +1,103 @@ +# Load this module into LLDB by running: +# command script import /path/to/Halide/tools/lldbhalide.py +import functools + +import lldb + + +def normalize(raw): + return raw.lstrip('"').rstrip('"').replace(r'\n', ' ').replace(' ', ' ') + + +def summary_string(summary_fn): + @functools.wraps(summary_fn) + def wrapper(value, _): + if value is None or not value.IsValid(): + return "" + + try: + return normalize(summary_fn(value).GetSummary()) + except Exception as e: + return f"" + + return wrapper + + +@summary_string +def call_name(value): + return value.EvaluateExpression("this->name()", lldb.SBExpressionOptions()) + + +@summary_string +def call_lldb_string(value): + return value.EvaluateExpression(f"Halide::Internal::lldb_string(*this)", lldb.SBExpressionOptions()) + + +class ProxyChildrenProvider: + def __init__(self, valobj, _): + self.inner = valobj + self.update() + + def update(self): + pass + + def num_children(self): + return self.inner.GetNumChildren() + + def get_child_index(self, name): + return self.inner.GetIndexOfChildWithName(name) + + def get_child_at_index(self, index): + return self.inner.GetChildAtIndex(index) + + +class IRChildrenProvider(ProxyChildrenProvider): + def __init__(self, valobj, _): + super().__init__(valobj.GetChildMemberWithName("ptr"), None) + + +class BoxChildrenProvider(IRChildrenProvider): + def __init__(self, valobj, _): + super().__init__(valobj.GetChildMemberWithName("contents"), None) + + +class FunctionChildrenProvider(ProxyChildrenProvider): + def __init__(self, valobj, _): + contents = valobj.EvaluateExpression("*this->contents.get()", lldb.SBExpressionOptions()) + print(contents) + super().__init__(contents, None) + + +def __lldb_init_module(debugger, _): + base_exprs = ["Add", "And", "Broadcast", "Call", "Cast", "Div", "EQ", "GE", "GT", "LE", "LT", "Let", "Load", "Max", + "Min", "Mod", "Mul", "NE", "Not", "Or", "Ramp", "Reinterpret", "Select", "Shuffle", "Sub", "Variable", + "VectorReduce"] + + for ty in base_exprs: + debugger.HandleCommand( + f"type summary add Halide::Internal::{ty} --python-function lldbhalide.call_lldb_string" + ) + + for ty in ('Expr', 'Internal::Stmt'): + debugger.HandleCommand( + f"type summary add Halide::{ty} --python-function lldbhalide.call_lldb_string" + ) + debugger.HandleCommand( + f'type synthetic add Halide::{ty} -l lldbhalide.IRChildrenProvider' + ) + + for ty in ("Definition", "FuncSchedule", "ReductionDomain", "StageSchedule"): + debugger.HandleCommand( + f"type synthetic add Halide::Internal::{ty} -l lldbhalide.BoxChildrenProvider" + ) + + debugger.HandleCommand( + f'type synthetic add Halide::Internal::Function -l lldbhalide.FunctionChildrenProvider' + ) + + debugger.HandleCommand("type summary add Halide::Internal::Dim -s '${var.var%S}'") + debugger.HandleCommand("type summary add Halide::RVar --python-function lldbhalide.call_name") + debugger.HandleCommand("type summary add Halide::Var --python-function lldbhalide.call_name") + + debugger.HandleCommand("type summary add halide_type_t -s '${var.code%S} bits=${var.bits%u} lanes=${var.lanes%u}'") + debugger.HandleCommand("type summary add Halide::Internal::RefCount -s ${var.count.Value%S}")